diff --git a/.gitignore b/.gitignore
index db426f86..103a91cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
-
 .DS_Store
-tests/repo-tests*
+test/repo-tests*
 
 # Logs
 logs
@@ -31,4 +30,4 @@ build/Release
 node_modules
 
 dist
-lib
\ No newline at end of file
+lib
diff --git a/package.json b/package.json
index 72e54add..155d9174 100644
--- a/package.json
+++ b/package.json
@@ -36,28 +36,28 @@
   "devDependencies": {
     "aegir": "^2.1.1",
     "block-stream2": "^1.1.0",
-    "brfs": "^1.4.3",
     "bs58": "^3.0.0",
     "buffer-loader": "0.0.1",
     "chai": "^3.5.0",
     "fs-blob-store": "^5.2.1",
-    "highland": "^2.7.4",
     "idb-plus-blob-store": "^1.0.0",
     "ipfs-repo": "^0.6.1",
-    "mocha": "^2.3.4",
     "ncp": "^2.0.0",
     "pre-commit": "^1.1.2",
     "raw-loader": "^0.5.1",
     "rimraf": "^2.5.1",
+    "streamifier": "^0.1.1",
     "string-to-stream": "^1.0.1"
   },
   "dependencies": {
     "async": "^1.5.2",
     "block-stream2": "^1.1.0",
     "debug": "^2.2.0",
+    "eventemitter2": "^1.0.0",
     "ipfs-blocks": "^0.2.0",
     "ipfs-merkle-dag": "^0.4.0",
     "ipfs-unixfs": "^0.1.0",
+    "readable-stream": "2.0.6",
     "through2": "^2.0.0"
   },
   "contributors": [
@@ -68,4 +68,4 @@
     "greenkeeperio-bot <support@greenkeeper.io>",
     "nginnever <ginneversource@gmail.com>"
   ]
-}
\ No newline at end of file
+}
diff --git a/src/importer.js b/src/importer.js
index 722fd33a..a22d06a5 100644
--- a/src/importer.js
+++ b/src/importer.js
@@ -3,247 +3,272 @@
 const debug = require('debug')
 const log = debug('importer')
 log.err = debug('importer:error')
-const fs = require('fs')
-const mDAG = require('ipfs-merkle-dag')
-const FixedSizeChunker = require('./chunker-fixed-size')
+const fsc = require('./chunker-fixed-size')
 const through2 = require('through2')
+const merkleDAG = require('ipfs-merkle-dag')
 const UnixFS = require('ipfs-unixfs')
-const async = require('async')
+const EE2 = require('eventemitter2').EventEmitter2
+const util = require('util')
+const bs58 = require('bs58')
 
-exports = module.exports
+exports = module.exports = Importer
 
 const CHUNK_SIZE = 262144
 
-// Use a layout + chunkers to convert a directory (or file) to the layout format
-exports.import = (target, dagService, options, callback) => {
-  if (typeof options === 'function') { callback = options; options = {} }
-
-  if (!target) { return callback(new Error('must specify target')) }
-  if (!dagService) { return callback(new Error('must specify dag service')) }
-
-  // options.recursive : follow dirs
-  // options.chunkers : obj with chunkers to each type of data, { default: dumb-chunker }
-
-  options = options || {}
-
-  if (Buffer.isBuffer(target)) {
-    bufferImporter(target, callback)
-  } else if (typeof target.on === 'function') {
-    // TODO Create Stream Importer
-    // streamImporter(options.stream, callback)
-    return callback(new Error('stream importer has not been built yet'))
-  } else if (typeof target === 'string') {
-    var stats
-    try {
-      stats = fs.statSync(target)
-    } catch (e) {
-      return callback(e)
-    }
-    if (stats.isFile()) {
-      fileImporter(target, callback)
-    } else if (stats.isDirectory() && options.recursive) {
-      dirImporter(target, callback)
-    } else {
-      return callback(new Error('recursive must be true to add a directory'))
-    }
-  }
-
-  function fileImporter (path, callback) {
-    const stats = fs.statSync(path)
-    if (stats.size > CHUNK_SIZE) {
-      const links = [] // { Hash: , Size: , Name: }
-      fs.createReadStream(path)
-        .pipe(new FixedSizeChunker(CHUNK_SIZE))
-        .pipe(through2((chunk, enc, cb) => {
-          // TODO: check if this is right (I believe it should be type 'raw'
-          // https://github.com/ipfs/go-ipfs/issues/2331
-          const raw = new UnixFS('file', chunk)
-
-          const node = new mDAG.DAGNode(raw.marshal())
-
-          dagService.add(node, function (err) {
-            if (err) {
-              return log.err(err)
-            }
-            links.push({
-              Hash: node.multihash(),
-              Size: node.size(),
-              leafSize: raw.fileSize(),
-              Name: ''
-            })
-            cb()
-          })
-        }, (cb) => {
-          const file = new UnixFS('file')
-          const parentNode = new mDAG.DAGNode()
-          links.forEach((l) => {
-            file.addBlockSize(l.leafSize)
-            const link = new mDAG.DAGLink(l.Name, l.Size, l.Hash)
-            parentNode.addRawLink(link)
-          })
-
-          parentNode.data = file.marshal()
-          dagService.add(parentNode, (err) => {
-            if (err) {
-              return log.err(err)
-            }
-
-            const pathSplit = path.split('/')
-            const fileName = pathSplit[pathSplit.length - 1]
+util.inherits(Importer, EE2)
 
-            callback(null, {
-              Hash: parentNode.multihash(),
-              Size: parentNode.size(),
-              Name: fileName
-            }) && cb()
-          })
-        }))
-    } else {
-      // create just one file node with the data directly
-      var buf = fs.readFileSync(path)
-      const fileUnixFS = new UnixFS('file', buf)
-      const fileNode = new mDAG.DAGNode(fileUnixFS.marshal())
-
-      dagService.add(fileNode, (err) => {
-        if (err) {
-          return log.err(err)
-        }
-
-        const split = path.split('/')
-        const fileName = split[split.length - 1]
+function Importer (dagService, options) {
+  if (!(this instanceof Importer)) {
+    return new Importer(dagService)
+  }
 
-        callback(null, {
-          Hash: fileNode.multihash(),
-          Size: fileNode.size(),
-          Name: fileName
-        })
-      })
-    }
+  if (!dagService) {
+    return new Error('must specify a dagService')
   }
 
-  function dirImporter (path, callback) {
-    const files = fs.readdirSync(path)
-    const dirUnixFS = new UnixFS('directory')
-    const dirNode = new mDAG.DAGNode()
+  const files = []
+  var counter = 0
 
-    if (files.length === 0) {
-      dirNode.data = dirUnixFS.marshal()
-      dagService.add(dirNode, (err) => {
+  this.add = (fl) => {
+    counter++
+    if (!fl.stream) {
+      // 1. create the empty dir dag node
+      // 2. write it to the dag store
+      // 3. add to the files array {path: <>, hash: <>}
+      // 4. emit the path + hash
+      const d = new UnixFS('directory')
+      const n = new merkleDAG.DAGNode()
+      n.data = d.marshal()
+      dagService.add(n, (err) => {
         if (err) {
-          return callback(err)
+          return this.emit('err', 'Failed to store' + fl.path)
+        }
+        const el = {
+          path: fl.path,
+          multihash: n.multihash(),
+          size: n.size(),
+          dataSize: d.fileSize()
         }
 
-        const split = path.split('/')
-        const dirName = split[split.length - 1]
-
-        callback(null, {
-          Hash: dirNode.multihash(),
-          Size: dirNode.size(),
-          Name: dirName
-        })
+        files.push(el)
+        this.emit('file', el)
       })
       return
     }
 
-    async.map(
-      files,
-      (file, cb) => {
-        const filePath = path + '/' + file
-        const stats = fs.statSync(filePath)
-        if (stats.isFile()) {
-          return fileImporter(filePath, cb)
-        } if (stats.isDirectory()) {
-          return dirImporter(filePath, cb)
-        } else {
-          return cb(new Error('Found a weird file' + path + file))
-        }
-      },
-      (err, results) => {
-        if (err) {
-          return callback(err)
-        }
-        results.forEach((result) => {
-          dirNode.addRawLink(new mDAG.DAGLink(result.Name, result.Size, result.Hash))
-        })
+    const leaves = []
 
-        dirNode.data = dirUnixFS.marshal()
+    fl.stream
+      .pipe(fsc(CHUNK_SIZE))
+      .pipe(through2((chunk, enc, cb) => {
+        // 1. create the unixfs merkledag node
+        // 2. add its hash and size to the leafs array
 
-        dagService.add(dirNode, (err) => {
-          if (err) {
-            return callback(err)
-          }
+        // TODO - Support really large files
+        // a) check if we already reach max chunks if yes
+        // a.1) create a parent node for all of the current leaves
+        // b.2) clean up the leaves array and add just the parent node
 
-          const split = path.split('/')
-          const dirName = split[split.length - 1]
+        const l = new UnixFS('file', chunk)
+        const n = new merkleDAG.DAGNode(l.marshal())
 
-          callback(null, {
-            Hash: dirNode.multihash(),
-            Size: dirNode.size(),
-            Name: dirName
-          })
-        })
-      })
-  }
-  function bufferImporter (buffer, callback) {
-    const links = [] // { Hash: , Size: , Name: }
-    if (buffer.length > CHUNK_SIZE) {
-      var fsc = new FixedSizeChunker(CHUNK_SIZE)
-      fsc.write(buffer)
-      fsc.end()
-      fsc.pipe(through2((chunk, enc, cb) => {
-        // TODO: check if this is right (I believe it should be type 'raw'
-        // https://github.com/ipfs/go-ipfs/issues/2331
-        const raw = new UnixFS('file', chunk)
-        const node = new mDAG.DAGNode(raw.marshal())
-
-        dagService.add(node, function (err) {
+        dagService.add(n, function (err) {
           if (err) {
-            return log.err(err)
+            return this.emit('err', 'Failed to store chunk of' + fl.path)
           }
-          links.push({
-            Hash: node.multihash(),
-            Size: node.size(),
-            leafSize: raw.fileSize(),
+
+          leaves.push({
+            Hash: n.multihash(),
+            Size: n.size(),
+            leafSize: l.fileSize(),
             Name: ''
           })
+
           cb()
         })
       }, (cb) => {
-        const file = new UnixFS('file')
-        const parentNode = new mDAG.DAGNode()
-        links.forEach((l) => {
-          file.addBlockSize(l.leafSize)
-          const link = new mDAG.DAGLink(l.Name, l.Size, l.Hash)
-          parentNode.addRawLink(link)
+        if (leaves.length === 1) {
+          // 1. add to the files array {path: <>, hash: <>}
+          // 2. emit the path + hash
+
+          const el = {
+            path: fl.path,
+            multihash: leaves[0].Hash,
+            size: leaves[0].Size,
+            dataSize: leaves[0].leafSize
+          }
+
+          files.push(el)
+          this.emit('file', el)
+          return done(cb)
+        }
+        // 1. create a parent node and add all the leafs
+        // 2. add to the files array {path: <>, hash: <>}
+        // 3. emit the path + hash of the parent node
+
+        const f = new UnixFS('file')
+        const n = new merkleDAG.DAGNode()
+
+        leaves.forEach((leaf) => {
+          f.addBlockSize(leaf.leafSize)
+          const l = new merkleDAG.DAGLink(leaf.Name, leaf.Size, leaf.Hash)
+          n.addRawLink(l)
         })
-        parentNode.data = file.marshal()
-        dagService.add(parentNode, (err) => {
+
+        n.data = f.marshal()
+        dagService.add(n, (err) => {
           if (err) {
-            return log.err(err)
+            this.emit('err', 'Failed to store' + fl.path)
+            return cb()
           }
 
-          callback(null, {
-            Hash: parentNode.multihash(),
-            Size: parentNode.size()
-          }) && cb()
+          const el = {
+            path: fl.path,
+            multihash: n.multihash(),
+            size: n.size()
+            // dataSize: f.fileSize()
+          }
+
+          files.push(el)
+          this.emit('file', el)
+          return done(cb)
         })
       }))
-    } else {
-      // create just one file node with the data directly
-      const fileUnixFS = new UnixFS('file', buffer)
-      const fileNode = new mDAG.DAGNode(fileUnixFS.marshal())
+    function done (cb) {
+      counter--
+      cb()
+    }
+  }
 
-      dagService.add(fileNode, (err) => {
-        if (err) {
-          return log.err(err)
+  this.finish = () => {
+    // if (files.length === 1) {
+    //  // The file was already emitted, nothing to do here
+    //  return
+    // }
+
+    if (counter > 0) {
+      return setTimeout(this.finish, 200)
+    }
+
+    // file struct
+    // {
+    //   path: // full path
+    //   multihash: // multihash of the dagNode
+    //   size: // cumulative size
+    //   dataSize: // dagNode size
+    // }
+
+    // 1) convert files to a tree
+    // for each path, split, add to a json tree and in the end the name of the
+    // file points to an object that is has a key multihash and respective value
+    // { foo: { bar: { baz.txt: <multihash> }}}
+    // the stop condition is if the value is not an object
+    const fileTree = {}
+
+    files.forEach((file) => {
+      let splitted = file.path.split('/')
+      if (splitted.length === 1) {
+        return // adding just one file
+        // fileTree[file.path] = bs58.encode(file.multihash).toString()
+      }
+      if (splitted[0] === '') {
+        splitted = splitted.slice(1)
+      }
+      var tmpTree = fileTree
+
+      for (var i = 0; i < splitted.length; i++) {
+        if (!tmpTree[splitted[i]]) {
+          tmpTree[splitted[i]] = {}
         }
+        if (i === splitted.length - 1) {
+          tmpTree[splitted[i]] = file.multihash
+        } else {
+          tmpTree = tmpTree[splitted[i]]
+        }
+      }
+    })
 
-        callback(null, {
-          Hash: fileNode.multihash(),
-          Size: fileNode.size()
-        })
+    if (Object.keys(fileTree).length === 0) {
+      return // no dirs to be created
+    }
+
+    // 2) create a index for multihash: { size, dataSize } so
+    // that we can fetch these when creating the merkle dag nodes
+
+    const mhIndex = {}
+
+    files.forEach((file) => {
+      mhIndex[bs58.encode(file.multihash)] = {
+        size: file.size,
+        dataSize: file.dataSize
+      }
+    })
+
+    // 3) expand leaves recursively
+    // create a dirNode
+    // Object.keys
+    // If the value is an Object
+    //   create a dir Node
+    //   Object.keys
+    //   Once finished, add the result as a link to the dir node
+    // If the value is not an object
+    //   add as a link to the dirNode
+
+    function traverse (tree, base) {
+      const keys = Object.keys(tree)
+      let tmpTree = tree
+      keys.map((key) => {
+        if (typeof tmpTree[key] === 'object' &&
+            !Buffer.isBuffer(tmpTree[key])) {
+          tmpTree[key] = traverse.call(this, tmpTree[key], base ? base + '/' + key : key)
+        }
+      })
+
+      // at this stage, all keys are multihashes
+      // create a dir node
+      // add all the multihashes as links
+      // return this new node multihash
+
+      const d = new UnixFS('directory')
+      const n = new merkleDAG.DAGNode()
+
+      keys.forEach((key) => {
+        const b58mh = bs58.encode(tmpTree[key])
+        const l = new merkleDAG.DAGLink(
+            key, mhIndex[b58mh].size, tmpTree[key])
+        n.addRawLink(l)
       })
+
+      n.data = d.marshal()
+      dagService.add(n, (err) => {
+        if (err) {
+          this.emit('err', 'failed to store dirNode')
+        }
+      })
+
+      if (!base) {
+        return
+      }
+
+      const el = {
+        path: base,
+        multihash: n.multihash(),
+        size: n.size()
+        // dataSize: '' // f.fileSize()
+      }
+
+      this.emit('file', el)
+
+      mhIndex[bs58.encode(n.multihash())] = { size: n.size() }
+      return n.multihash()
     }
+    /* const rootHash = */ traverse.call(this, fileTree)
+
+    // TODO
+    // Since we never shoot for adding multiple directions at the root level, the following might not be necessary, reserving it for later:
+    //
+    // if at the first level, there was only one key (most cases)
+    // do nothing, if there was many, emit a rootHash with '/'
+    // emit root hash as well (as '/')
   }
-  // function streamImporter (stream, callback) {}
 }
diff --git a/test/browser.js b/test/browser.js
index 575396f7..76a57ecd 100644
--- a/test/browser.js
+++ b/test/browser.js
@@ -1,7 +1,6 @@
 /* eslint-env mocha */
 'use strict'
 
-const tests = require('./buffer-test')
 const async = require('async')
 const store = require('idb-plus-blob-store')
 const _ = require('lodash')
@@ -46,17 +45,9 @@ describe('IPFS data importing tests on the Browser', function () {
   })
 
   // create the repo constant to be used in the import a small buffer test
-  const options = {
-    stores: {
-      keys: store,
-      config: store,
-      datastore: store,
-      // datastoreLegacy: needs https://github.com/ipfs/js-ipfs-repo/issues/6#issuecomment-164650642
-      logs: store,
-      locks: store,
-      version: store
-    }
-  }
-  const repo = new IPFSRepo('ipfs', options)
-  tests(repo)
+  const repo = new IPFSRepo('ipfs', {stores: store})
+
+  require('./test-exporter')(repo)
+  require('./test-importer')(repo)
+  require('./test-fixed-size-chunker')
 })
diff --git a/test/buffer-test.js b/test/buffer-test.js
deleted file mode 100644
index b057ff4d..00000000
--- a/test/buffer-test.js
+++ /dev/null
@@ -1,143 +0,0 @@
-/* eslint-env mocha */
-'use strict'
-
-const unixFSEngine = require('./../src')
-const importer = unixFSEngine.importer
-const exporter = unixFSEngine.exporter
-const BlockService = require('ipfs-blocks').BlockService
-const DAGService = require('ipfs-merkle-dag').DAGService
-const DAGNode = require('ipfs-merkle-dag').DAGNode
-const UnixFS = require('ipfs-unixfs')
-
-const expect = require('chai').expect
-
-const smallBuf = require('buffer!./test-data/200Bytes.txt')
-const bigBuf = require('buffer!./test-data/1.2MiB.txt')
-const bigBlock = require('buffer!./test-data/1.2MiB.txt.block')
-const bigLink = require('buffer!./test-data/1.2MiB.txt.link-block0')
-const marbuf = require('buffer!./test-data/200Bytes.txt.block')
-
-module.exports = function (repo) {
-  describe('layout: importer', function () {
-    it('import a small buffer', function (done) {
-      // this is just like "import a small file"
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      const buf = smallBuf
-      importer.import(buf, ds, function (err, stat) {
-        expect(err).to.not.exist
-        ds.get(stat.Hash, function (err, node) {
-          expect(err).to.not.exist
-          const smallDAGNode = new DAGNode()
-          smallDAGNode.unMarshal(marbuf)
-          expect(node.size()).to.equal(smallDAGNode.size())
-          expect(node.multihash()).to.deep.equal(smallDAGNode.multihash())
-          done()
-        })
-      })
-    })
-
-    it('import a big buffer', function (done) {
-      // this is just like "import a big file"
-      const buf = bigBuf
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      importer.import(buf, ds, function (err, stat) {
-        expect(err).to.not.exist
-        ds.get(stat.Hash, function (err, node) {
-          expect(err).to.not.exist
-          const bigDAGNode = new DAGNode()
-          bigDAGNode.unMarshal(bigBlock)
-          expect(node.size()).to.equal(bigDAGNode.size())
-          expect(node.links).to.deep.equal(bigDAGNode.links)
-
-          const nodeUnixFS = UnixFS.unmarshal(node.data)
-          const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data)
-          expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type)
-          expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data)
-          expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes)
-          expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize())
-
-          expect(node.data).to.deep.equal(bigDAGNode.data)
-          expect(node.multihash()).to.deep.equal(bigDAGNode.multihash())
-
-          ds.get(node.links[0].hash, function (err, node) {
-            expect(err).to.not.exist
-            const leaf = new DAGNode()
-
-            const marbuf2 = bigLink
-            leaf.unMarshal(marbuf2)
-            expect(node.links).to.deep.equal(leaf.links)
-            expect(node.links.length).to.equal(0)
-            expect(leaf.links.length).to.equal(0)
-            expect(leaf.marshal()).to.deep.equal(marbuf2)
-            const nodeUnixFS = UnixFS.unmarshal(node.data)
-            const leafUnixFS = UnixFS.unmarshal(leaf.data)
-            expect(nodeUnixFS.type).to.equal(leafUnixFS.type)
-            expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize())
-            expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data)
-            expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes)
-            expect(node.data).to.deep.equal(leaf.data)
-            expect(node.marshal()).to.deep.equal(leaf.marshal())
-            done()
-          })
-        })
-      })
-    })
-
-    it('export a file with no links', (done) => {
-      const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8'
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      const testExport = exporter(hash, ds)
-      testExport.on('file', (data) => {
-        ds.get(hash, (err, fetchedNode) => {
-          expect(err).to.not.exist
-          const unmarsh = UnixFS.unmarshal(fetchedNode.data)
-          expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0])
-          done()
-        })
-      })
-    })
-
-    it('export a small file with links', (done) => {
-      const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q'
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      const testExport = exporter(hash, ds)
-      testExport.on('file', (data) => {
-        expect(data.stream).to.exist
-        done()
-      })
-    })
-
-    it('export a large file > 5mb', (done) => {
-      const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE'
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      const testExport = exporter(hash, ds)
-      testExport.on('file', (data) => {
-        expect(data.stream).to.exist
-        done()
-      })
-    })
-
-    it('export a directory', (done) => {
-      const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN'
-      const bs = new BlockService(repo)
-      const ds = new DAGService(bs)
-      const testExport = exporter(hash, ds)
-      var fs = []
-      testExport.on('file', (data) => {
-        fs.push(data)
-      })
-      setTimeout(() => {
-        expect(fs[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt')
-        expect(fs[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another')
-        expect(fs[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt')
-        expect(fs[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2')
-        done()
-      }, 1000)
-    })
-  })
-}
diff --git a/test/node.js b/test/node.js
index 94d254b6..1d4a9edf 100644
--- a/test/node.js
+++ b/test/node.js
@@ -1,5 +1,4 @@
 /* eslint-env mocha */
-
 'use strict'
 
 const fs = require('fs')
@@ -7,6 +6,8 @@ const ncp = require('ncp').ncp
 const rimraf = require('rimraf')
 const expect = require('chai').expect
 const path = require('path')
+const IPFSRepo = require('ipfs-repo')
+const fsbs = require('fs-blob-store')
 
 describe('core', () => {
   const repoExample = path.join(process.cwd(), '/test/repo-example')
@@ -20,6 +21,21 @@ describe('core', () => {
     })
   })
 
+  before((done) => {
+    fs.stat(path.join(__dirname, '/test-data/dir-nested/dir-another'), (err, exists) => {
+      if (err) {
+        fs.mkdirSync(path.join(__dirname, '/test-data/dir-nested/dir-another'))
+      }
+    })
+
+    fs.stat(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'), (err, exists) => {
+      if (err) {
+        fs.mkdirSync(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'))
+      }
+      done()
+    })
+  })
+
   after((done) => {
     rimraf(repoTests, (err) => {
       expect(err).to.equal(null)
@@ -27,19 +43,8 @@ describe('core', () => {
     })
   })
 
-  const tests = fs.readdirSync(__dirname)
-  tests.filter((file) => {
-    if (file === 'index.js' ||
-        file === 'browser.js' ||
-        file === 'test-data' ||
-        file === 'repo-example' ||
-        file === 'buffer-test.js' ||
-        file.indexOf('repo-tests') > -1) {
-      return false
-    }
-
-    return true
-  }).forEach((file) => {
-    require('./' + file)
-  })
+  const repo = new IPFSRepo(repoTests, {stores: fsbs})
+  require('./test-exporter')(repo)
+  require('./test-importer')(repo)
+  require('./test-fixed-size-chunker')
 })
diff --git a/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data b/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data
new file mode 100644
index 00000000..38a7ed3a
Binary files /dev/null and b/test/repo-example/blocks/122000e5/122000e508d684a83e258b5230e5791d6c35dc3c287dbcc8ea26bb3bcf3d7c4ad942.data differ
diff --git a/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data b/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data
new file mode 100644
index 00000000..1d48c015
Binary files /dev/null and b/test/repo-example/blocks/1220039c/1220039c0842ef4f653b86630496e6d686b63271de581cedfb1de84bdea6f504ec12.data differ
diff --git a/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data b/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data
new file mode 100644
index 00000000..cbd601a6
Binary files /dev/null and b/test/repo-example/blocks/12200975/12200975fdafa3ecdb026118837fe67a9ed6ed11ef5aacd61a516cddf519b1cb56e1.data differ
diff --git a/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data b/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data
new file mode 100644
index 00000000..72674694
Binary files /dev/null and b/test/repo-example/blocks/12200d06/12200d06d4afb85a411662dc882c52c9c79e7422bec62c066f8215705880b6d3a29c.data differ
diff --git a/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data b/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data
new file mode 100644
index 00000000..f9810363
--- /dev/null
+++ b/test/repo-example/blocks/12200e72/12200e725b1743efb7d00acec61eaf7ba84fafc2a0443cd606301d8018bb79d7b41e.data
@@ -0,0 +1,4728 @@
+
+����There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some��
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data b/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data
new file mode 100644
index 00000000..c3a2f685
Binary files /dev/null and b/test/repo-example/blocks/1220141a/1220141a2aa747a6b67ff33e0f68be055ed1dde2f90350090a781b6bed84cf4ae810.data differ
diff --git a/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data b/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data
new file mode 100644
index 00000000..4e910622
Binary files /dev/null and b/test/repo-example/blocks/1220184e/1220184e2152a396caf16f41a1cc4bec30817fb2ab953ca645c57e83ac431cfc8a6a.data differ
diff --git a/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data b/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data
new file mode 100644
index 00000000..0b520379
Binary files /dev/null and b/test/repo-example/blocks/12201dab/12201dab1bf33e76651d0ae2f63dbf5b9e71e979b34c1cc7a19b8276d8c11966027c.data differ
diff --git a/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data b/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data
new file mode 100644
index 00000000..a4027d46
Binary files /dev/null and b/test/repo-example/blocks/1220204e/1220204e693ccb04abd065623b9f2182eb3b9e398e8db8f5d4d9be789213c2b6a9aa.data differ
diff --git a/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data b/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data
new file mode 100644
index 00000000..9cda061b
Binary files /dev/null and b/test/repo-example/blocks/122023e3/122023e3cf165402916213caef9870f26b75881cf86c1e7c7204fa35b55917021aa7.data differ
diff --git a/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data b/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data
new file mode 100644
index 00000000..a655cf83
--- /dev/null
+++ b/test/repo-example/blocks/12202418/12202418e7fe47e72ca241ee347bb0afcf78845a2f2e1a9b8aa0a70403446013f817.data
@@ -0,0 +1,5 @@
+
+���wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data b/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data
new file mode 100644
index 00000000..b99ceb21
--- /dev/null
+++ b/test/repo-example/blocks/12202562/12202562b552a79e9ff1184ee0c7fa50f39fe3810564c6a261b20cd04205372941a6.data
@@ -0,0 +1,3 @@
+,
+" ��`�u�>/2��lil�f��YB�'M%���bar�
+
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data b/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data
new file mode 100644
index 00000000..70641f44
--- /dev/null
+++ b/test/repo-example/blocks/1220259c/1220259cae55bae8fa6c5b8945839ac89a8a1fa03369f8f5c204913a2913905fad04.data
@@ -0,0 +1,5 @@
+
+����wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e���
\ No newline at end of file
diff --git a/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data b/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data
new file mode 100644
index 00000000..e705b9b0
Binary files /dev/null and b/test/repo-example/blocks/122028d0/122028d0abf61304b10a47837f9f33d87304d79d20d50b8b00a127b8a4fd18e9b237.data differ
diff --git a/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data b/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data
new file mode 100644
index 00000000..00360cfb
--- /dev/null
+++ b/test/repo-example/blocks/12202a59/12202a59b6b978159fe9c8457654d80cba54bcc9f5bd27eac8cf15f7a6ad271174ed.data
@@ -0,0 +1,3 @@
+4
+" ��U��Fr���n����b��⇾�?��|<�	test-data���
+
\ No newline at end of file
diff --git a/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data b/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data
new file mode 100644
index 00000000..81663143
Binary files /dev/null and b/test/repo-example/blocks/12202cb7/12202cb76f3bebf2a211cea2c40a935710ce084e5293cec018d315be10447b6b6b71.data differ
diff --git a/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data b/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data
new file mode 100644
index 00000000..ee87b15f
--- /dev/null
+++ b/test/repo-example/blocks/1220350a/1220350acf62bdbd344db9a98e4e7cf76710b329ede24582feef6db74baafe4f500d.data
@@ -0,0 +1,1452 @@
+
+����l systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+
+��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data b/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data
new file mode 100644
index 00000000..bb713c56
Binary files /dev/null and b/test/repo-example/blocks/12203706/12203706326544f96d4bafa6ab0bd2a1efd89ba605b2ee188db80354e372f42637fd.data differ
diff --git a/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data b/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data
new file mode 100644
index 00000000..64ce0aeb
Binary files /dev/null and b/test/repo-example/blocks/12203b5c/12203b5c1250bc4f33b8a1ecb29363eba288017410ee5300cdb50615b0e2693edac6.data differ
diff --git a/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data b/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data
new file mode 100644
index 00000000..46fecabf
--- /dev/null
+++ b/test/repo-example/blocks/12203ff8/12203ff87b7c43c3fa04f0be34df5ae62f82914b235e731c897c33fd6f3c5f11406d.data
@@ -0,0 +1,2 @@
+
+����Q�������
\ No newline at end of file
diff --git a/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data b/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data
new file mode 100644
index 00000000..9f1e7af6
Binary files /dev/null and b/test/repo-example/blocks/122040f1/122040f13ab05f941ce841b8b28b7578a8bfa228153c370f81b4bc62ba0d66640555.data differ
diff --git a/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data b/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data
new file mode 100644
index 00000000..2a6dbb58
--- /dev/null
+++ b/test/repo-example/blocks/122044fb/122044fbd45a9685c6b9d7f332982816e81aacdcfb0b7a742d7ce8d70fd3d62972b9.data
@@ -0,0 +1,2 @@
+
+ ��L �� �� �� �� ��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data b/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data
new file mode 100644
index 00000000..bfe6600f
Binary files /dev/null and b/test/repo-example/blocks/12204849/12204849953dbcc44598f39816866c6d96355f33eaf1004d61ab1880fdeb3edde2f0.data differ
diff --git a/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data b/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data
new file mode 100644
index 00000000..74f62a02
--- /dev/null
+++ b/test/repo-example/blocks/12204f7b/12204f7b3de7c738dd8c5eaeba868fa71f6e1cd8d9dae9eb43d7e562597f0b38a8dc.data
@@ -0,0 +1,3 @@
+
+
+�'Q��#��
diff --git a/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data b/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data
new file mode 100644
index 00000000..42c502e2
Binary files /dev/null and b/test/repo-example/blocks/122066df/122066df09f34f09cdb6c7c9f62dd5c8fa1895895ecfafc48898434b52285426ffc6.data differ
diff --git a/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data b/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data
new file mode 100644
index 00000000..a9c1c069
Binary files /dev/null and b/test/repo-example/blocks/12206990/1220699077ec4b2a853f2c007964cc174ee772e6e9b20cc73d2039b6a950226f60e3.data differ
diff --git a/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data b/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data
new file mode 100644
index 00000000..96566028
--- /dev/null
+++ b/test/repo-example/blocks/12206bbb/12206bbba0768a844ab1194e5876dfb6ce4399b2ea87a4b718c9850db60faeb50105.data
@@ -0,0 +1,6 @@
+
+��5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�3
+" Y��9_)a���˹2�R�m�Ŗke�9��dir-another0
+" Ty�5;_9Yf�q��F�Lhyl���/��level-1�
+�
\ No newline at end of file
diff --git a/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data b/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data
new file mode 100644
index 00000000..f4c039c2
Binary files /dev/null and b/test/repo-example/blocks/12207248/12207248d65fb34acf915665eaeb29e2075fc63e678b0281c5b5dc9c36b199e6c051.data differ
diff --git a/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data b/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data
new file mode 100644
index 00000000..f57749f0
Binary files /dev/null and b/test/repo-example/blocks/12207369/12207369ddf6b9bb22adc2b957eb3ca7f6a6f3477c85b665f21db50834091f902033.data differ
diff --git a/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data b/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data
new file mode 100644
index 00000000..5a3836e9
Binary files /dev/null and b/test/repo-example/blocks/12207395/12207395ead6f5161ddf1dc041c8640cb41a635756afb5a66fe49e8675b80a1b49ef.data differ
diff --git a/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data b/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data
new file mode 100644
index 00000000..e845c839
Binary files /dev/null and b/test/repo-example/blocks/1220742e/1220742ede16a7d165f4b000e5f4094933b61d3d0b6ffb3cb6ec5b3eed32c0f2a38f.data differ
diff --git a/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data b/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data
new file mode 100644
index 00000000..44403205
--- /dev/null
+++ b/test/repo-example/blocks/1220751e/1220751e71f050b51a74159c51829b210f326e73bc4f0a712e1f636625a8515426d8.data
@@ -0,0 +1,3 @@
+
+
+x\�΃��
diff --git a/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data b/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data
new file mode 100644
index 00000000..8c345f38
--- /dev/null
+++ b/test/repo-example/blocks/1220783d/1220783d550428fcd841a9579a08e0d10619a4238b6acdc73c8cf6932120e1f6e2df.data
@@ -0,0 +1,4732 @@
+
+���� systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among th��
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data b/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data
new file mode 100644
index 00000000..4741988d
--- /dev/null
+++ b/test/repo-example/blocks/1220797b/1220797bea239eddf7c11a47143062975b00b162ab286a019a0db2da5abfb67e516e.data
@@ -0,0 +1,4729 @@
+
+����e academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others ��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data b/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data
new file mode 100644
index 00000000..6d043733
--- /dev/null
+++ b/test/repo-example/blocks/12207a19/12207a19de7e4b4d9e5db771aeca6fff1cc9f6518a7375f1671c61b9219f689c3851.data
@@ -0,0 +1,2 @@
+
+r�ː�'Q��#��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data b/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data
new file mode 100644
index 00000000..be380799
--- /dev/null
+++ b/test/repo-example/blocks/12207f93/12207f93b47af638189785c73d414d653468aedcdccff814e89ac755a5502e7041e5.data
@@ -0,0 +1,4730 @@
+
+����[7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successfu��
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data b/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data
new file mode 100644
index 00000000..871a6bf0
--- /dev/null
+++ b/test/repo-example/blocks/1220853a/1220853a6e9a61a6943c06d8c59d590cdae42b700677790812ce68f1eb2389e6ea23.data
@@ -0,0 +1,4729 @@
+
+����
+����There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file sy��
\ No newline at end of file
diff --git a/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data b/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data
new file mode 100644
index 00000000..7c40850f
Binary files /dev/null and b/test/repo-example/blocks/12208ef4/12208ef4750c671599c429b6d952b4a8c5d2d761827c119bd382efdcb0773ec435.data differ
diff --git a/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data b/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data
new file mode 100644
index 00000000..637f391c
--- /dev/null
+++ b/test/repo-example/blocks/1220941f/1220941f8cb777c014ea4f229b48038414fea4bfe9523e9c06d15b4d731d30d6d624.data
@@ -0,0 +1,2 @@
+
+����x\�΃��
\ No newline at end of file
diff --git a/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data b/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data
new file mode 100644
index 00000000..ca141be2
Binary files /dev/null and b/test/repo-example/blocks/122095a8/122095a86a3b176d88282a3c56b77118e0986ff4784eb0c864e21c306dc6a632d5b9.data differ
diff --git a/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data b/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data
new file mode 100644
index 00000000..562529a2
Binary files /dev/null and b/test/repo-example/blocks/12209c37/12209c376abcbaf28dbb16e8a44f24043a4d67324293d32caac64fd374cf73f3194a.data differ
diff --git a/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data b/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data
new file mode 100644
index 00000000..725a9b22
--- /dev/null
+++ b/test/repo-example/blocks/12209e8e/12209e8eabd807dd930cb0606fb18010e98b2469efc12f55077b1d64e485c8e81255.data
@@ -0,0 +1,5 @@
+
+@:4
+" si����"�¹W�<����G|��e��4	� 3
+1.2MiB.txt��L
+:
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data b/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data
new file mode 100644
index 00000000..69e8f9e4
--- /dev/null
+++ b/test/repo-example/blocks/1220a89a/1220a89aa29de372b8989fe4ce0843acd69557211293a56bb0a1c7218bbc2050a5d1.data
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data b/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data
new file mode 100644
index 00000000..1067edb4
--- /dev/null
+++ b/test/repo-example/blocks/1220aebc/1220aebc7f9014417e6f5e2f2d9330882b8995f1ee71e3cd1f18e003781983ae8169.data
@@ -0,0 +1,6 @@
+
+��
+���wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e���
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data b/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data
new file mode 100644
index 00000000..c57d7186
Binary files /dev/null and b/test/repo-example/blocks/1220aee7/1220aee755c2de4672ffe9ad07eb6eaff7f3eb62c5c119e287be963fedf07c193cbf.data differ
diff --git a/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data b/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data
new file mode 100644
index 00000000..ce734230
--- /dev/null
+++ b/test/repo-example/blocks/1220b106/1220b106e54902a7d812437110f5c1b8dd9dbc3758d306c6adc0fe7a04d5c8fd3d3c.data
@@ -0,0 +1,4 @@
+4
+" si����"�¹W�<����G|��e��4	� 3
+1.2MiB.txt��L
+
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data b/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data
new file mode 100644
index 00000000..df20559d
Binary files /dev/null and b/test/repo-example/blocks/1220b2c2/1220b2c2d9773a8316428106953b22061a79b6a3f36b7abed609c13a6b06e13c716c.data differ
diff --git a/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data b/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data
new file mode 100644
index 00000000..6a0cbe82
--- /dev/null
+++ b/test/repo-example/blocks/1220b42c/1220b42c0cb8915e874e6909edcd67aaa5dd0fba77fe8ee9262b8ffa6c02ab94d033.data
@@ -0,0 +1,3 @@
+
+
+Q�������
diff --git a/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data b/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data
new file mode 100644
index 00000000..508cff2e
Binary files /dev/null and b/test/repo-example/blocks/1220bfcc/1220bfccda787baba32b59c78450ac3d20b633360b43992c77289f9ed46d843561e6.data differ
diff --git a/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data b/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data
new file mode 100644
index 00000000..5a59204a
--- /dev/null
+++ b/test/repo-example/blocks/1220c597/1220c59777258deac9d4b8e49e84daf2d449cd81ab2e8b7d2a89e2f5a02d114588a5.data
@@ -0,0 +1,2 @@
+
+stem. Some��
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data b/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data
new file mode 100644
index 00000000..8e5a1d76
Binary files /dev/null and b/test/repo-example/blocks/1220c8af/1220c8af6ecba1e2daa242da154307928e671ed7744c5a1474f67722a3ccb0be2469.data differ
diff --git a/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data b/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data
new file mode 100644
index 00000000..c1f9899a
Binary files /dev/null and b/test/repo-example/blocks/1220cadf/1220cadf66920934b4e5db9c7fcd8b387fcaf9842671daf799fa4e9ae994cfe3a9d9.data differ
diff --git a/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data b/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data
new file mode 100644
index 00000000..5b090964
Binary files /dev/null and b/test/repo-example/blocks/1220cc1a/1220cc1a6b60658b47f7f67dff1a135eedd6ba4736a126b2cc04a38e7e3e1a243407.data differ
diff --git a/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data b/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data
new file mode 100644
index 00000000..6b72d373
--- /dev/null
+++ b/test/repo-example/blocks/1220cefe/1220cefeb7568c689275e79f9480d743d04ebbc98d140afc21126c08a3053ef24a8f.data
@@ -0,0 +1,2 @@
+
+u����	��r[��
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data b/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data
new file mode 100644
index 00000000..4f882793
--- /dev/null
+++ b/test/repo-example/blocks/1220d313/1220d313d073066579f96f3f1a5fcc2775e98d319a203f238a389408140add211e41.data
@@ -0,0 +1,3 @@
+
+
+�	��r[��
diff --git a/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data b/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data
new file mode 100644
index 00000000..b1df8c51
Binary files /dev/null and b/test/repo-example/blocks/1220dfb8/1220dfb83d070291965675a9d7e9e7321b92e0aeb724606b42b5df689e6b547ccc21.data differ
diff --git a/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data b/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data
new file mode 100644
index 00000000..b75d8023
Binary files /dev/null and b/test/repo-example/blocks/1220e213/1220e213c7180c2051bec353ab32c9a67cf9a32f33c4b560cf34d11bf50c4b6f9b1e.data differ
diff --git a/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data b/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data
new file mode 100644
index 00000000..f0b3a599
Binary files /dev/null and b/test/repo-example/blocks/1220e482/1220e482600f1178538696dbe42801a86cb455fd3c20e0c172235e4c86e0a76c670f.data differ
diff --git a/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data b/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data
new file mode 100644
index 00000000..4eb5d7bf
--- /dev/null
+++ b/test/repo-example/blocks/1220e93a/1220e93a39630237c5d8d370d0ad31c31e6a6b6ced9791fc2c9fe282fe25491c73d7.data
@@ -0,0 +1,4 @@
+
+A;5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�
+;
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data b/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data
new file mode 100644
index 00000000..c52fcda4
Binary files /dev/null and b/test/repo-example/blocks/1220f145/1220f145e82f37fcd08875e13c15894d210860ff30faa3f540365dd01f7ab4b00dfb.data differ
diff --git a/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data b/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data
new file mode 100644
index 00000000..a762644a
Binary files /dev/null and b/test/repo-example/blocks/1220f7a0/1220f7a06300a26e3a858261f3d9a05aa65f57b3ad326df0643a6d9d08a01c0903d7.data differ
diff --git a/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data b/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data
new file mode 100644
index 00000000..5accb645
--- /dev/null
+++ b/test/repo-example/blocks/1220f8a0/1220f8a01860aa759f3e2f32aee26c0d0b696cc966dac959429127134d1d2592a7de.data
@@ -0,0 +1,3 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�
+
\ No newline at end of file
diff --git a/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data b/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data
new file mode 100644
index 00000000..42f65bd9
Binary files /dev/null and b/test/repo-example/blocks/1220ff70/1220ff700c84c919c855797d1b071dcf9e0ca68f385a3b93e3145e289bc555646f66.data differ
diff --git a/test/repo-example/datastore/CURRENT b/test/repo-example/datastore/CURRENT
index 875cf233..6ba31a31 100644
--- a/test/repo-example/datastore/CURRENT
+++ b/test/repo-example/datastore/CURRENT
@@ -1 +1 @@
-MANIFEST-000007
+MANIFEST-000009
diff --git a/test/repo-example/datastore/LOG b/test/repo-example/datastore/LOG
index 863b68fd..f5ffd612 100644
--- a/test/repo-example/datastore/LOG
+++ b/test/repo-example/datastore/LOG
@@ -1,10 +1,7 @@
-=============== Dec 10, 2015 (PST) ===============
-07:50:02.056578 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
-07:50:02.057231 db@open opening
-07:50:02.057312 journal@recovery F·1
-07:50:02.057514 journal@recovery recovering @3
-07:50:02.058921 mem@flush created L0@5 N·4 S·1KiB "/ip..\xf6\xe4\xa9,v5":"/pk..\xf6\xe4\xa9,v6"
-07:50:02.059983 db@janitor F·4 G·0
-07:50:02.060001 db@open done T·2.755926ms
-07:50:02.073183 db@close closing
-07:50:02.073285 db@close done T·97.522µs
+=============== Apr 22, 2016 (WEST) ===============
+03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+03:16:42.272857 db@open opening
+03:16:42.275673 db@janitor F·4 G·0
+03:16:42.275700 db@open done T·2.831108ms
+03:16:42.596938 db@close closing
+03:16:42.597082 db@close done T·139.194µs
diff --git a/test/repo-example/datastore/LOG.old b/test/repo-example/datastore/LOG.old
index 708351e7..863b68fd 100644
--- a/test/repo-example/datastore/LOG.old
+++ b/test/repo-example/datastore/LOG.old
@@ -1,10 +1,10 @@
 =============== Dec 10, 2015 (PST) ===============
-07:49:57.048841 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
-07:49:57.049014 db@open opening
-07:49:57.049066 journal@recovery F·1
-07:49:57.049233 journal@recovery recovering @1
-07:49:57.049693 mem@flush created L0@2 N·2 S·211B "/lo..oot,v2":"/lo..ins,v1"
-07:49:57.050381 db@janitor F·3 G·0
-07:49:57.050397 db@open done T·1.375431ms
-07:49:57.064580 db@close closing
-07:49:57.064655 db@close done T·72.59µs
+07:50:02.056578 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+07:50:02.057231 db@open opening
+07:50:02.057312 journal@recovery F·1
+07:50:02.057514 journal@recovery recovering @3
+07:50:02.058921 mem@flush created L0@5 N·4 S·1KiB "/ip..\xf6\xe4\xa9,v5":"/pk..\xf6\xe4\xa9,v6"
+07:50:02.059983 db@janitor F·4 G·0
+07:50:02.060001 db@open done T·2.755926ms
+07:50:02.073183 db@close closing
+07:50:02.073285 db@close done T·97.522µs
diff --git a/test/repo-example/datastore/MANIFEST-000007 b/test/repo-example/datastore/MANIFEST-000007
deleted file mode 100644
index 6af3b545..00000000
Binary files a/test/repo-example/datastore/MANIFEST-000007 and /dev/null differ
diff --git a/test/repo-example/datastore/MANIFEST-000009 b/test/repo-example/datastore/MANIFEST-000009
new file mode 100644
index 00000000..a69b18a4
Binary files /dev/null and b/test/repo-example/datastore/MANIFEST-000009 differ
diff --git a/test/test-data/foo-big/1.2MiB.txt b/test/test-data/foo-big/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/test/test-data/foo-big/1.2MiB.txt differ
diff --git a/test/test-data/foo/bar/200Bytes.txt b/test/test-data/foo/bar/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/test/test-data/foo/bar/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/test/test-data/pam/1.2MiB.txt b/test/test-data/pam/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/test/test-data/pam/1.2MiB.txt differ
diff --git a/test/test-data/pam/pum/1.2MiB.txt b/test/test-data/pam/pum/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/test/test-data/pam/pum/1.2MiB.txt differ
diff --git a/test/test-data/pam/pum/200Bytes.txt b/test/test-data/pam/pum/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/test/test-data/pam/pum/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/test/test-data/pim/1.2MiB.txt b/test/test-data/pim/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/test/test-data/pim/1.2MiB.txt differ
diff --git a/test/test-data/pim/200Bytes.txt b/test/test-data/pim/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/test/test-data/pim/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/test/test-exporter.js b/test/test-exporter.js
new file mode 100644
index 00000000..3cd0603e
--- /dev/null
+++ b/test/test-exporter.js
@@ -0,0 +1,78 @@
+/* eslint-env mocha */
+'use strict'
+
+const unixFSEngine = require('./../src')
+const exporter = unixFSEngine.exporter
+const expect = require('chai').expect
+const BlockService = require('ipfs-blocks').BlockService
+const DAGService = require('ipfs-merkle-dag').DAGService
+const UnixFS = require('ipfs-unixfs')
+
+let ds
+
+module.exports = function (repo) {
+  describe('exporter', function () {
+    before((done) => {
+      const bs = new BlockService(repo)
+      expect(bs).to.exist
+      ds = new DAGService(bs)
+      expect(ds).to.exist
+      done()
+    })
+
+    it('export a file with no links', (done) => {
+      const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8'
+      const bs = new BlockService(repo)
+      const ds = new DAGService(bs)
+      const testExport = exporter(hash, ds)
+      testExport.on('file', (data) => {
+        ds.get(hash, (err, fetchedNode) => {
+          expect(err).to.not.exist
+          const unmarsh = UnixFS.unmarshal(fetchedNode.data)
+          expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0])
+          done()
+        })
+      })
+    })
+
+    it('export a small file with links', (done) => {
+      const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q'
+      const bs = new BlockService(repo)
+      const ds = new DAGService(bs)
+      const testExport = exporter(hash, ds)
+      testExport.on('file', (data) => {
+        expect(data.stream).to.exist
+        done()
+      })
+    })
+
+    it('export a large file > 5mb', (done) => {
+      const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE'
+      const bs = new BlockService(repo)
+      const ds = new DAGService(bs)
+      const testExport = exporter(hash, ds)
+      testExport.on('file', (data) => {
+        expect(data.stream).to.exist
+        done()
+      })
+    })
+
+    it('export a directory', (done) => {
+      const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN'
+      const bs = new BlockService(repo)
+      const ds = new DAGService(bs)
+      const testExport = exporter(hash, ds)
+      var fsa = []
+      testExport.on('file', (data) => {
+        fsa.push(data)
+      })
+      setTimeout(() => {
+        expect(fsa[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt')
+        expect(fsa[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another')
+        expect(fsa[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt')
+        expect(fsa[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2')
+        done()
+      }, 1000)
+    })
+  })
+}
diff --git a/test/test-fixed-size-chunker.js b/test/test-fixed-size-chunker.js
index 2cc0ccac..516b3cb0 100644
--- a/test/test-fixed-size-chunker.js
+++ b/test/test-fixed-size-chunker.js
@@ -7,19 +7,12 @@ const expect = require('chai').expect
 const stringToStream = require('string-to-stream')
 const through = require('through2')
 const path = require('path')
-const isNode = !global.window
 
-let fileStream
-if (isNode) {
-  fileStream = function () {
-    return fs.createReadStream(path.join(__dirname, '/test-data/1MiB.txt'))
-  }
-} else {
-  const myFile = require('buffer!./test-data/1MiB.txt')
-  fileStream = function () {
-    return stringToStream(myFile)
-  }
-}
+const fileStream = () => stringToStream(
+  fs.readFileSync(
+    path.join(__dirname, '/test-data/1MiB.txt')
+  ).toString('hex')
+)
 
 describe('chunker: fixed size', function () {
   it('256 Bytes chunks', function (done) {
diff --git a/test/test-importer.js b/test/test-importer.js
new file mode 100644
index 00000000..0324b61f
--- /dev/null
+++ b/test/test-importer.js
@@ -0,0 +1,205 @@
+/* eslint-env mocha */
+'use strict'
+
+const Importer = require('./../src').importer
+const expect = require('chai').expect
+const BlockService = require('ipfs-blocks').BlockService
+const DAGService = require('ipfs-merkle-dag').DAGService
+// const DAGNode = require('ipfs-merkle-dag').DAGNode
+const bs58 = require('bs58')
+const fs = require('fs')
+// const UnixFS = require('ipfs-unixfs')
+const path = require('path')
+const streamifier = require('streamifier')
+
+let ds
+
+module.exports = function (repo) {
+  describe('importer', function () {
+    const bigFile = fs.readFileSync(path.join(__dirname, '/test-data/1.2MiB.txt'))
+    const smallFile = fs.readFileSync(path.join(__dirname, '/test-data/200Bytes.txt'))
+
+    // const dirSmall = path.join(__dirname, '/test-data/dir-small')
+    // const dirBig = path.join(__dirname, '/test-data/dir-big')
+    // const dirNested = path.join(__dirname, '/test-data/dir-nested')
+
+    before((done) => {
+      const bs = new BlockService(repo)
+      expect(bs).to.exist
+      ds = new DAGService(bs)
+      expect(ds).to.exist
+      done()
+    })
+
+    it('small file (smaller than a chunk)', (done) => {
+      const buffered = smallFile
+      const r = streamifier.createReadStream(buffered)
+      const i = new Importer(ds)
+      i.on('file', (file) => {
+        expect(file.path).to.equal('200Bytes.txt')
+        expect(bs58.encode(file.multihash)).to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8')
+        expect(file.size).to.equal(211)
+        done()
+      })
+      i.add({path: '200Bytes.txt', stream: r})
+      i.finish()
+    })
+
+    it('small file (smaller than a chunk) inside a dir', (done) => {
+      const buffered = smallFile
+      const r = streamifier.createReadStream(buffered)
+      const i = new Importer(ds)
+      var counter = 0
+      i.on('file', (file) => {
+        counter++
+        if (file.path === 'foo/bar/200Bytes.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8')
+        }
+        if (file.path === 'foo/bar') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('Qmf5BQbTUyUAvd6Ewct83GYGnE1F6btiC3acLhR8MDxgkD')
+        }
+        if (file.path === 'foo') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmQrb6KKWGo8w7zKfx2JksptY6wN7B2ysSBdKZr4xMU36d')
+        }
+        if (counter === 3) {
+          done()
+        }
+      })
+      i.on('err', (err) => {
+        expect(err).to.not.exist
+      })
+      i.add({path: 'foo/bar/200Bytes.txt', stream: r})
+      i.finish()
+    })
+
+    it('file bigger than a single chunk', (done) => {
+      const buffered = bigFile
+      const r = streamifier.createReadStream(buffered)
+      const i = new Importer(ds)
+      i.on('file', (file) => {
+        expect(file.path).to.equal('1.2MiB.txt')
+        expect(bs58.encode(file.multihash)).to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q')
+        expect(file.size).to.equal(1258318)
+        done()
+      })
+      i.add({path: '1.2MiB.txt', stream: r})
+      i.finish()
+    })
+
+    it('file bigger than a single chunk inside a dir', (done) => {
+      const buffered = bigFile
+      const r = streamifier.createReadStream(buffered)
+      const i = new Importer(ds)
+      var counter = 0
+      i.on('file', (file) => {
+        counter++
+        if (file.path === 'foo-big/1.2Mib.txt') {
+          expect(bs58.encode(file.multihash)).to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q')
+          expect(file.size).to.equal(1258318)
+        }
+        if (file.path === 'foo-big') {
+          expect(bs58.encode(file.multihash)).to.equal('QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj')
+          expect(file.size).to.equal(1258376)
+        }
+        if (counter === 2) {
+          done()
+        }
+      })
+      i.add({path: 'foo-big/1.2MiB.txt', stream: r})
+      i.finish()
+    })
+
+    it.skip('file (that chunk number exceeds max links)', (done) => {
+      // TODO
+    })
+
+    it('empty directory', (done) => {
+      const i = new Importer(ds)
+      i.on('file', (file) => {
+        expect(file.path).to.equal('empty-dir')
+        expect(bs58.encode(file.multihash)).to.equal('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn')
+        expect(file.size).to.equal(4)
+        done()
+      })
+      i.add({path: 'empty-dir'})
+      i.finish()
+    })
+
+    it('directory with files', (done) => {
+      const r1 = streamifier.createReadStream(smallFile)
+      const r2 = streamifier.createReadStream(bigFile)
+
+      const i = new Importer(ds)
+      var counter = 0
+      i.on('file', (file) => {
+        counter++
+        if (file.path === 'pim/200Bytes.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8')
+        }
+        if (file.path === 'pim/1.2MiB.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q')
+        }
+        if (file.path === 'pim') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i')
+        }
+        if (counter === 3) {
+          done()
+        }
+      })
+      i.on('err', (err) => {
+        expect(err).to.not.exist
+      })
+      i.add({path: 'pim/200Bytes.txt', stream: r1})
+      i.add({path: 'pim/1.2MiB.txt', stream: r2})
+      i.finish()
+    })
+
+    it('nested directory (2 levels deep)', (done) => {
+      const r1 = streamifier.createReadStream(smallFile)
+      const r2 = streamifier.createReadStream(bigFile)
+      const r3 = streamifier.createReadStream(bigFile)
+
+      const i = new Importer(ds)
+      var counter = 0
+      i.on('file', (file) => {
+        counter++
+        if (file.path === 'pam/pum/200Bytes.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8')
+        }
+        if (file.path === 'pam/pum/1.2MiB.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q')
+        }
+        if (file.path === 'pam/1.2MiB.txt') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q')
+        }
+        if (file.path === 'pam/pum') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i')
+        }
+        if (file.path === 'pam') {
+          expect(bs58.encode(file.multihash).toString())
+            .to.equal('QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av')
+        }
+        if (counter === 5) {
+          done()
+        }
+      })
+      i.on('err', (err) => {
+        expect(err).to.not.exist
+      })
+      i.add({path: 'pam/pum/200Bytes.txt', stream: r1})
+      i.add({path: 'pam/pum/1.2MiB.txt', stream: r2})
+      i.add({path: 'pam/1.2MiB.txt', stream: r3})
+      i.finish()
+    })
+  })
+}
diff --git a/test/test-unixfs-engine.js b/test/test-unixfs-engine.js
deleted file mode 100644
index 4d563e9e..00000000
--- a/test/test-unixfs-engine.js
+++ /dev/null
@@ -1,332 +0,0 @@
-/* eslint-env mocha */
-'use strict'
-
-const unixFSEngine = require('./../src')
-const importer = unixFSEngine.importer
-const exporter = unixFSEngine.exporter
-const expect = require('chai').expect
-const IPFSRepo = require('ipfs-repo')
-const BlockService = require('ipfs-blocks').BlockService
-const DAGService = require('ipfs-merkle-dag').DAGService
-const DAGNode = require('ipfs-merkle-dag').DAGNode
-const fsBlobStore = require('fs-blob-store')
-const bs58 = require('bs58')
-const fs = require('fs')
-const UnixFS = require('ipfs-unixfs')
-const path = require('path')
-
-let ds
-
-describe('layout: importer', function () {
-  const big = path.join(__dirname, '/test-data/1.2MiB.txt')
-  const small = path.join(__dirname, '/test-data/200Bytes.txt')
-  const dirSmall = path.join(__dirname, '/test-data/dir-small')
-  const dirBig = path.join(__dirname, '/test-data/dir-big')
-  const dirNested = path.join(__dirname, '/test-data/dir-nested')
-
-  // check to see if missing empty dirs need to be created
-
-  fs.stat(path.join(__dirname, '/test-data/dir-nested/dir-another'), function (err, exists) {
-    if (err) {
-      fs.mkdir(path.join(__dirname, '/test-data/dir-nested/dir-another'))
-    }
-  })
-  fs.stat(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'), function (err, exists) {
-    if (err) {
-      fs.mkdir(path.join(__dirname, '/test-data/dir-nested/level-1/level-2'))
-    }
-  })
-
-  it('start dag service', function (done) {
-    const options = {
-      stores: {
-        keys: fsBlobStore,
-        config: fsBlobStore,
-        datastore: fsBlobStore,
-        // datastoreLegacy: needs https://github.com/ipfsBlobStore/js-ipfsBlobStore-repo/issues/6#issuecomment-164650642
-        logs: fsBlobStore,
-        locks: fsBlobStore,
-        version: fsBlobStore
-      }
-    }
-    const repo = new IPFSRepo(process.env.IPFS_PATH, options)
-    const bs = new BlockService(repo)
-    ds = new DAGService(bs)
-    expect(bs).to.exist
-    expect(ds).to.exist
-    done()
-  })
-
-  it('import a bad path', (done) => {
-    importer.import('/foo/bar/quux/a!wofjaeiwojfoiew', ds, function (err, stat) {
-      expect(err).to.exist
-      done()
-    })
-  })
-
-  it('import a small file', (done) => {
-    importer.import(small, ds, function (err, stat) {
-      expect(err).to.not.exist
-      ds.get(stat.Hash, (err, node) => {
-        expect(err).to.not.exist
-        const smallDAGNode = new DAGNode()
-        const buf = fs.readFileSync(small + '.block')
-        smallDAGNode.unMarshal(buf)
-        expect(node.size()).to.equal(smallDAGNode.size())
-        expect(node.multihash()).to.deep.equal(smallDAGNode.multihash())
-        done()
-      })
-    })
-  })
-
-  it('import a big file', (done) => {
-    importer.import(big, ds, function (err, stat) {
-      expect(err).to.not.exist
-      ds.get(stat.Hash, (err, node) => {
-        expect(err).to.not.exist
-
-        const bigDAGNode = new DAGNode()
-        const buf = fs.readFileSync(big + '.block')
-        bigDAGNode.unMarshal(buf)
-        expect(node.size()).to.equal(bigDAGNode.size())
-        expect(node.links).to.deep.equal(bigDAGNode.links)
-
-        const nodeUnixFS = UnixFS.unmarshal(node.data)
-        const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data)
-        expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type)
-        expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data)
-        expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes)
-        expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize())
-
-        expect(node.data).to.deep.equal(bigDAGNode.data)
-        expect(node.multihash()).to.deep.equal(bigDAGNode.multihash())
-
-        ds.get(node.links[0].hash, (err, node) => {
-          expect(err).to.not.exist
-          const leaf = new DAGNode()
-          const buf2 = fs.readFileSync(big + '.link-block0')
-          leaf.unMarshal(buf2)
-          expect(node.links).to.deep.equal(leaf.links)
-          expect(node.links.length).to.equal(0)
-          expect(leaf.links.length).to.equal(0)
-          expect(leaf.marshal()).to.deep.equal(buf2)
-          const nodeUnixFS = UnixFS.unmarshal(node.data)
-          const leafUnixFS = UnixFS.unmarshal(leaf.data)
-          expect(nodeUnixFS.type).to.equal(leafUnixFS.type)
-          expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize())
-          expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data)
-          expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes)
-          expect(node.data).to.deep.equal(leaf.data)
-          expect(node.marshal()).to.deep.equal(leaf.marshal())
-          done()
-        })
-      })
-    })
-  })
-
-  it('import a small directory', (done) => {
-    importer.import(dirSmall, ds, {
-      recursive: true
-    }, function (err, stats) {
-      expect(err).to.not.exist
-
-      ds.get(stats.Hash, (err, node) => {
-        expect(err).to.not.exist
-        const dirSmallNode = new DAGNode()
-        const buf = fs.readFileSync(dirSmall + '.block')
-        dirSmallNode.unMarshal(buf)
-        expect(node.links).to.deep.equal(dirSmallNode.links)
-
-        const nodeUnixFS = UnixFS.unmarshal(node.data)
-        const dirUnixFS = UnixFS.unmarshal(dirSmallNode.data)
-
-        expect(nodeUnixFS.type).to.equal(dirUnixFS.type)
-        expect(nodeUnixFS.fileSize()).to.equal(dirUnixFS.fileSize())
-        expect(nodeUnixFS.data).to.deep.equal(dirUnixFS.data)
-        expect(nodeUnixFS.blockSizes).to.deep.equal(dirUnixFS.blockSizes)
-        expect(node.data).to.deep.equal(dirSmallNode.data)
-        expect(node.marshal()).to.deep.equal(dirSmallNode.marshal())
-        done()
-      })
-    })
-  })
-
-  it('import a big directory', (done) => {
-    importer.import(dirBig, ds, {
-      recursive: true
-    }, function (err, stats) {
-      expect(err).to.not.exist
-
-      ds.get(stats.Hash, (err, node) => {
-        expect(err).to.not.exist
-        const dirNode = new DAGNode()
-        const buf = fs.readFileSync(dirBig + '.block')
-        dirNode.unMarshal(buf)
-        expect(node.links).to.deep.equal(dirNode.links)
-
-        const nodeUnixFS = UnixFS.unmarshal(node.data)
-        const dirUnixFS = UnixFS.unmarshal(dirNode.data)
-
-        expect(nodeUnixFS.type).to.equal(dirUnixFS.type)
-        expect(nodeUnixFS.fileSize()).to.equal(dirUnixFS.fileSize())
-        expect(nodeUnixFS.data).to.deep.equal(dirUnixFS.data)
-        expect(nodeUnixFS.blockSizes).to.deep.equal(dirUnixFS.blockSizes)
-        expect(node.data).to.deep.equal(dirNode.data)
-        expect(node.marshal()).to.deep.equal(dirNode.marshal())
-        done()
-      })
-    })
-  })
-
-  it('import a nested directory', (done) => {
-    importer.import(dirNested, ds, {
-      recursive: true
-    }, function (err, stats) {
-      expect(err).to.not.exist
-      expect(bs58.encode(stats.Hash).toString()).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN')
-
-      ds.get(stats.Hash, (err, node) => {
-        expect(err).to.not.exist
-        expect(node.links.length).to.equal(3)
-
-        const dirNode = new DAGNode()
-        const buf = fs.readFileSync(dirNested + '.block')
-        dirNode.unMarshal(buf)
-        expect(node.links).to.deep.equal(dirNode.links)
-        expect(node.data).to.deep.equal(dirNode.data)
-        done()
-      })
-    })
-  })
-
-  it('import a small buffer', (done) => {
-    // this is just like "import a small file"
-    const buf = fs.readFileSync(path.join(__dirname, '/test-data/200Bytes.txt'))
-    importer.import(buf, ds, function (err, stat) {
-      expect(err).to.not.exist
-      ds.get(stat.Hash, (err, node) => {
-        expect(err).to.not.exist
-        const smallDAGNode = new DAGNode()
-        const marbuf = fs.readFileSync(small + '.block')
-        smallDAGNode.unMarshal(marbuf)
-        expect(node.size()).to.equal(smallDAGNode.size())
-        expect(node.multihash()).to.deep.equal(smallDAGNode.multihash())
-        done()
-      })
-    })
-  })
-
-  it('import a big buffer', (done) => {
-    // this is just like "import a big file"
-    const buf = fs.readFileSync(path.join(__dirname, '/test-data/1.2MiB.txt'))
-    importer.import(buf, ds, function (err, stat) {
-      expect(err).to.not.exist
-      ds.get(stat.Hash, (err, node) => {
-        expect(err).to.not.exist
-
-        const bigDAGNode = new DAGNode()
-        const marbuf = fs.readFileSync(big + '.block')
-        bigDAGNode.unMarshal(marbuf)
-        expect(node.size()).to.equal(bigDAGNode.size())
-        expect(node.links).to.deep.equal(bigDAGNode.links)
-
-        const nodeUnixFS = UnixFS.unmarshal(node.data)
-        const bigDAGNodeUnixFS = UnixFS.unmarshal(bigDAGNode.data)
-        expect(nodeUnixFS.type).to.equal(bigDAGNodeUnixFS.type)
-        expect(nodeUnixFS.data).to.deep.equal(bigDAGNodeUnixFS.data)
-        expect(nodeUnixFS.blockSizes).to.deep.equal(bigDAGNodeUnixFS.blockSizes)
-        expect(nodeUnixFS.fileSize()).to.equal(bigDAGNodeUnixFS.fileSize())
-
-        expect(node.data).to.deep.equal(bigDAGNode.data)
-        expect(node.multihash()).to.deep.equal(bigDAGNode.multihash())
-
-        ds.get(node.links[0].hash, (err, node) => {
-          expect(err).to.not.exist
-          const leaf = new DAGNode()
-
-          const marbuf2 = fs.readFileSync(big + '.link-block0')
-          leaf.unMarshal(marbuf2)
-          expect(node.links).to.deep.equal(leaf.links)
-          expect(node.links.length).to.equal(0)
-          expect(leaf.links.length).to.equal(0)
-          expect(leaf.marshal()).to.deep.equal(marbuf2)
-          const nodeUnixFS = UnixFS.unmarshal(node.data)
-          const leafUnixFS = UnixFS.unmarshal(leaf.data)
-          expect(nodeUnixFS.type).to.equal(leafUnixFS.type)
-          expect(nodeUnixFS.fileSize()).to.equal(leafUnixFS.fileSize())
-          expect(nodeUnixFS.data).to.deep.equal(leafUnixFS.data)
-          expect(nodeUnixFS.blockSizes).to.deep.equal(leafUnixFS.blockSizes)
-          expect(node.data).to.deep.equal(leaf.data)
-          expect(node.marshal()).to.deep.equal(leaf.marshal())
-          done()
-        })
-      })
-    })
-  })
-
-  it.skip('import from a readable stream', (done) => {
-  })
-})
-
-describe('layout: exporter', function () {
-  it('export a file with no links', (done) => {
-    const hash = 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8'
-    const testExport = exporter(hash, ds)
-    testExport.on('file', (data) => {
-      ds.get(hash, (err, fetchedNode) => {
-        expect(err).to.not.exist
-        const unmarsh = UnixFS.unmarshal(fetchedNode.data)
-        expect(unmarsh.data).to.deep.equal(data.stream._readableState.buffer[0])
-        done()
-      })
-    })
-  })
-
-  it('export a small file with links', (done) => {
-    const hash = 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q'
-    const testExport = exporter(hash, ds)
-    testExport.on('file', (data) => {
-      var ws = fs.createWriteStream(path.join(process.cwd(), '/test', data.path))
-      data.stream.pipe(ws)
-      data.stream.on('end', () => {
-        const stats = fs.existsSync(path.join(process.cwd(), '/test', data.path))
-        expect(stats).to.equal(true)
-        fs.unlinkSync(path.join(process.cwd(), '/test', data.path))
-        done()
-      })
-    })
-  })
-
-  it('export a large file > 5mb', (done) => {
-    const hash = 'QmRQgufjp9vLE8XK2LGKZSsPCFCF6e4iynCQtNB5X2HBKE'
-    const testExport = exporter(hash, ds)
-    testExport.on('file', (data) => {
-      var ws = fs.createWriteStream(path.join(process.cwd(), '/test', data.path))
-      data.stream.pipe(ws)
-      data.stream.on('end', () => {
-        const stats = fs.existsSync(path.join(process.cwd(), '/test', data.path))
-        expect(stats).to.equal(true)
-        fs.unlinkSync(path.join(process.cwd(), '/test', data.path))
-        done()
-      })
-    })
-  })
-
-  it('export a directory', (done) => {
-    const hash = 'QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN'
-    var testExport = exporter(hash, ds)
-    var fs = []
-    var x = 0
-    testExport.on('file', (data) => {
-      fs.push(data)
-      x++
-      if (x === 4) {
-        expect(fs[0].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/200Bytes.txt')
-        expect(fs[1].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/dir-another')
-        expect(fs[2].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/200Bytes.txt')
-        expect(fs[3].path).to.equal('QmWChcSFMNcFkfeJtNd8Yru1rE6PhtCRfewi1tMwjkwKjN/level-1/level-2')
-        done()
-      }
-    })
-  })
-})