Skip to content

Commit 4658cb0

Browse files
committed
Migration 8-9: migrate CIDsv1 to raw multihash
1 parent 012d6a8 commit 4658cb0

File tree

9 files changed

+2079
-2
lines changed

9 files changed

+2079
-2
lines changed

go.mod

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
11
module github.com/ipfs/fs-repo-migrations
22

33
go 1.13
4+
5+
replace github.com/ipfs/fs-repo-migrations/ipfs-8-to-9/migration => ./ipfs-8-to-9/migration
6+
7+
require (
8+
github.com/dgraph-io/badger v1.6.0
9+
github.com/ipfs/fs-repo-migrations/ipfs-8-to-9/migration v0.0.0-00010101000000-000000000000
10+
github.com/mitchellh/go-homedir v1.1.0
11+
golang.org/x/net v0.0.0-20190620200207-3b0461eec859
12+
)

go.sum

Lines changed: 835 additions & 0 deletions
Large diffs are not rendered by default.

ipfs-8-to-9/ipfs-8-to-9

13.3 MB
Binary file not shown.

ipfs-8-to-9/main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package main
2+
3+
import (
4+
migrate "github.com/ipfs/fs-repo-migrations/go-migrate"
5+
mg8 "github.com/ipfs/fs-repo-migrations/ipfs-8-to-9/migration"
6+
)
7+
8+
func main() {
9+
m := mg8.Migration{}
10+
migrate.Main(&m)
11+
}

ipfs-8-to-9/migration/go.mod

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
module github.com/ipfs/fs-repo-migrations/ipfs-8-to-9/migration
2+
3+
go 1.13
4+
5+
require (
6+
github.com/hsanjuan/go-libp2p-http v0.0.2 // indirect
7+
github.com/ipfs/dir-index-html v1.0.3 // indirect
8+
github.com/ipfs/fs-repo-migrations v1.4.0
9+
github.com/ipfs/go-cid v0.0.4
10+
github.com/ipfs/go-datastore v0.3.1
11+
github.com/ipfs/go-ds-flatfs v0.3.0
12+
github.com/ipfs/go-ipfs v0.4.22-0.20200130064341-6750ee973e2a
13+
github.com/ipfs/go-ipfs-addr v0.0.1 // indirect
14+
github.com/ipfs/go-ipfs-ds-help v0.0.1
15+
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
16+
github.com/multiformats/go-multicodec v0.1.6 // indirect
17+
github.com/multiformats/go-multihash v0.0.10
18+
github.com/prometheus/tsdb v0.7.1 // indirect
19+
github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect
20+
golang.org/x/crypto v0.0.0-20200115085410-6d4e4cb37c7d // indirect
21+
)

ipfs-8-to-9/migration/go.sum

Lines changed: 922 additions & 0 deletions
Large diffs are not rendered by default.

ipfs-8-to-9/migration/migration.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// package mg8 contains the code to perform 8-9 repository migration in
2+
// go-ipfs. This performs a switch to raw multihashes for all keys in the
3+
// go-ipfs datastore (https://github.com/ipfs/go-ipfs/issues/6815).
4+
package mg8
5+
6+
import (
7+
"errors"
8+
"fmt"
9+
10+
migrate "github.com/ipfs/fs-repo-migrations/go-migrate"
11+
lock "github.com/ipfs/fs-repo-migrations/ipfs-1-to-2/repolock"
12+
"github.com/ipfs/go-datastore/namespace"
13+
14+
mfsr "github.com/ipfs/fs-repo-migrations/mfsr"
15+
log "github.com/ipfs/fs-repo-migrations/stump"
16+
ds "github.com/ipfs/go-datastore"
17+
"github.com/ipfs/go-ipfs/plugin/loader"
18+
fsrepo "github.com/ipfs/go-ipfs/repo/fsrepo"
19+
)
20+
21+
// Migration implements the migration described above.
22+
type Migration struct{}
23+
24+
// Versions returns the current version string for this migration.
25+
func (m Migration) Versions() string {
26+
return "8-to-9"
27+
}
28+
29+
// Reversible returns false. This migration cannot be reverted, as we do not
30+
// know which raw hashes were actually CIDv1s. However, things should work all
31+
// the same as they will be treated as CIDv0s in old versions anyways.
32+
func (m Migration) Reversible() bool {
33+
return false
34+
}
35+
36+
// Apply runs the migration.
37+
func (m Migration) Apply(opts migrate.Options) error {
38+
log.Verbose = opts.Verbose
39+
log.Log("applying %s repo migration", m.Versions())
40+
41+
log.VLog("locking repo at %q", opts.Path)
42+
lk, err := lock.Lock2(opts.Path)
43+
if err != nil {
44+
return err
45+
}
46+
defer lk.Close()
47+
48+
repo := mfsr.RepoPath(opts.Path)
49+
50+
log.VLog(" - verifying version is '8'")
51+
if err := repo.CheckVersion("8"); err != nil {
52+
return err
53+
}
54+
55+
log.VLog(" - loading repo configurations")
56+
plugins, err := loader.NewPluginLoader(opts.Path)
57+
if err != nil {
58+
return fmt.Errorf("error loading plugins: %s", err)
59+
}
60+
61+
if err := plugins.Initialize(); err != nil {
62+
return fmt.Errorf("error initializing plugins: %s", err)
63+
}
64+
65+
if err := plugins.Inject(); err != nil {
66+
return fmt.Errorf("error injecting plugins: %s", err)
67+
}
68+
69+
cfg, err := fsrepo.ConfigAt(opts.Path)
70+
if err != nil {
71+
return err
72+
}
73+
74+
dsc, err := fsrepo.AnyDatastoreConfig(cfg.Datastore.Spec)
75+
if err != nil {
76+
return err
77+
}
78+
79+
dstore, err := dsc.Create(opts.Path)
80+
if err != nil {
81+
return err
82+
}
83+
defer dstore.Close()
84+
85+
// TODO: assuming the user has not modified this
86+
blocks := namespace.Wrap(dstore, ds.NewKey("/blocks"))
87+
88+
log.VLog(" - starting CIDv1 to raw multihash block migration")
89+
cidSwapper := CidSwapper{blocks}
90+
total, err := cidSwapper.Run()
91+
if err != nil {
92+
log.Error(err)
93+
return err
94+
}
95+
96+
log.Log("%d CIDv1 keys swapped to raw multihashes", total)
97+
if err := repo.WriteVersion("9"); err != nil {
98+
log.Error("failed to write version file")
99+
return err
100+
}
101+
log.Log("updated version file")
102+
103+
return nil
104+
}
105+
106+
// Revert attempts to undo the migration.
107+
func (m Migration) Revert(opts migrate.Options) error {
108+
// TODO: Consider a no-op revert though
109+
return errors.New("This migration cannot be reverted")
110+
}

ipfs-8-to-9/migration/swapper.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
package mg8
2+
3+
import (
4+
"errors"
5+
"sync"
6+
"sync/atomic"
7+
8+
log "github.com/ipfs/fs-repo-migrations/stump"
9+
"github.com/ipfs/go-cid"
10+
"github.com/ipfs/go-datastore"
11+
ds "github.com/ipfs/go-datastore"
12+
"github.com/ipfs/go-datastore/query"
13+
dshelp "github.com/ipfs/go-ipfs-ds-help"
14+
"github.com/multiformats/go-multihash"
15+
)
16+
17+
// SyncSize specifies how much we batch data before committing and syncing
18+
// Increasing this number may result
19+
var SyncSize uint64 = 10 * 1024 * 1024 // 1MiB
20+
21+
// NWorkers sets the number of batching threads to run
22+
var NWorkers int = 4
23+
24+
// CidSwapper reads all the keys in a datastore and replaces
25+
// them with their raw multihash.
26+
type CidSwapper struct {
27+
Store ds.Batching // the datastore to migrate.
28+
}
29+
30+
// Run lists all the keys in the datastore and triggers a swap operation for
31+
// those corresponding to CIDv1s (replacing them by their raw multihash).
32+
//
33+
// Run returns the total number of keys swapped.
34+
func (cswap *CidSwapper) Run() (uint64, error) {
35+
// Always perform a final sync
36+
defer cswap.Store.Sync(ds.NewKey("/"))
37+
// Query all keys. We will loop all keys
38+
// and swap those that can be parsed as CIDv1.
39+
queryAll := query.Query{
40+
KeysOnly: true,
41+
}
42+
43+
results, err := cswap.Store.Query(queryAll)
44+
if err != nil {
45+
return 0, err
46+
}
47+
defer results.Close()
48+
resultsCh := results.Next()
49+
50+
var total uint64
51+
var nErrors uint64
52+
var wg sync.WaitGroup
53+
wg.Add(NWorkers)
54+
for i := 0; i < NWorkers; i++ {
55+
go func() {
56+
defer wg.Done()
57+
n, e := cswap.swapWorker(resultsCh)
58+
atomic.AddUint64(&total, n)
59+
atomic.AddUint64(&nErrors, e)
60+
}()
61+
}
62+
wg.Wait()
63+
if nErrors > 0 {
64+
return total, errors.New("errors happened during the migration. Consider running it again")
65+
}
66+
67+
return total, nil
68+
}
69+
70+
// swapWorkers reads query results from a channel and renames CIDv1 keys to
71+
// raw multihashes by reading the blocks and storing them with the new
72+
// key. Returns the number of keys swapped and the number of errors.
73+
func (cswap *CidSwapper) swapWorker(resultsCh <-chan query.Result) (uint64, uint64) {
74+
var swapped uint64
75+
var errored uint64
76+
var curSyncSize uint64
77+
78+
// Process keys from the results channel
79+
for res := range resultsCh {
80+
if res.Error != nil {
81+
log.Error(res.Error)
82+
errored++
83+
continue
84+
}
85+
86+
oldKey := ds.NewKey(res.Key)
87+
c, err := dsKeyToCid(oldKey)
88+
if err != nil {
89+
// complain if we find anything that is not a CID but
90+
// leave it as it is.
91+
log.Log("could not parse %s as a Cid", oldKey)
92+
continue
93+
}
94+
if c.Version() == 0 { // CidV0 are multihashes, leave them.
95+
continue
96+
}
97+
98+
// Cid Version > 0
99+
newKey := multihashToDsKey(c.Hash())
100+
size, err := cswap.swap(oldKey, newKey)
101+
if err != nil {
102+
log.Error("swapping %s for %s: %s", oldKey, newKey, err)
103+
errored++
104+
continue
105+
}
106+
swapped++
107+
curSyncSize += size
108+
109+
// Commit and Sync if we reached SyncSize
110+
if curSyncSize >= SyncSize {
111+
curSyncSize = 0
112+
err = cswap.Store.Sync(ds.NewKey("/"))
113+
if err != nil {
114+
log.Error(err)
115+
errored++
116+
continue
117+
}
118+
}
119+
}
120+
return swapped, errored
121+
}
122+
123+
// swap swaps the old for the new key in a single transaction.
124+
func (cswap *CidSwapper) swap(old, new ds.Key) (uint64, error) {
125+
// Unfortunately grouping multiple swaps in larger batches usually
126+
// results in "too many open files" errors in flatfs (many really
127+
// small files) . So we do small batches instead and Sync at larger
128+
// intervals.
129+
// Note flatfs will not clear up the batch after committing, so
130+
// the object cannot be-reused.
131+
batcher, err := cswap.Store.Batch()
132+
if err != nil {
133+
return 0, err
134+
}
135+
136+
v, err := cswap.Store.Get(old)
137+
vLen := uint64(len(v))
138+
if err != nil {
139+
return vLen, err
140+
}
141+
if err := batcher.Put(new, v); err != nil {
142+
return vLen, err
143+
}
144+
if err := batcher.Delete(old); err != nil {
145+
return vLen, err
146+
}
147+
return vLen, batcher.Commit()
148+
}
149+
150+
// Copied from go-ipfs-ds-help as that one is gone.
151+
func dsKeyToCid(dsKey datastore.Key) (cid.Cid, error) {
152+
kb, err := dshelp.BinaryFromDsKey(dsKey)
153+
if err != nil {
154+
return cid.Cid{}, err
155+
}
156+
return cid.Cast(kb)
157+
}
158+
159+
// multihashToDsKey creates a Key from the given Multihash.
160+
// here to avoid dependency on newer dshelp function.
161+
// TODO: can be removed if https://github.com/ipfs/go-ipfs-ds-help/pull/18
162+
// is merged.
163+
func multihashToDsKey(k multihash.Multihash) datastore.Key {
164+
return dshelp.NewKeyFromBinary(k)
165+
}

main.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@ import (
1010
gomigrate "github.com/ipfs/fs-repo-migrations/go-migrate"
1111
mg0 "github.com/ipfs/fs-repo-migrations/ipfs-0-to-1/migration"
1212
mg1 "github.com/ipfs/fs-repo-migrations/ipfs-1-to-2/migration"
13-
homedir "github.com/ipfs/fs-repo-migrations/ipfs-2-to-3/Godeps/_workspace/src/github.com/mitchellh/go-homedir"
1413
mg2 "github.com/ipfs/fs-repo-migrations/ipfs-2-to-3/migration"
1514
mg3 "github.com/ipfs/fs-repo-migrations/ipfs-3-to-4/migration"
1615
mg4 "github.com/ipfs/fs-repo-migrations/ipfs-4-to-5/migration"
1716
mg5 "github.com/ipfs/fs-repo-migrations/ipfs-5-to-6/migration"
1817
mg6 "github.com/ipfs/fs-repo-migrations/ipfs-6-to-7/migration"
18+
mg7 "github.com/ipfs/fs-repo-migrations/ipfs-7-to-8/migration"
19+
mg8 "github.com/ipfs/fs-repo-migrations/ipfs-8-to-9/migration"
1920
mfsr "github.com/ipfs/fs-repo-migrations/mfsr"
21+
homedir "github.com/mitchellh/go-homedir"
2022
)
2123

22-
var CurrentVersion = 7
24+
var CurrentVersion = 9
2325

2426
var migrations = []gomigrate.Migration{
2527
&mg0.Migration{},
@@ -29,6 +31,8 @@ var migrations = []gomigrate.Migration{
2931
&mg4.Migration{},
3032
&mg5.Migration{},
3133
&mg6.Migration{},
34+
&mg7.Migration{}, // update bootstrappers
35+
&mg8.Migration{}, // converts CIDv1s to raw multihashes
3236
}
3337

3438
func GetIpfsDir() (string, error) {

0 commit comments

Comments
 (0)