Skip to content
This repository was archived by the owner on Jun 27, 2023. It is now read-only.

Commit 4568234

Browse files
committed
WIP
1 parent efd7822 commit 4568234

File tree

5 files changed

+498
-125
lines changed

5 files changed

+498
-125
lines changed

hamt/hamt.go

Lines changed: 105 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,25 +37,41 @@ const (
3737
HashMurmur3 uint64 = 0x22
3838
)
3939

40-
func (ds *Shard) isValueNode() bool {
40+
// Hash function declared as global variable only for testing purposes.
41+
// FIXME: We shoul have a cleaner way to replace this during tests.
42+
var HAMTHashFunction = murmur3Hash
43+
44+
func (ds *Shard) IsValueNode() bool {
4145
return ds.key != "" && ds.val != nil
4246
}
4347

4448
// A Shard represents the HAMT. It should be initialized with NewShard().
4549
type Shard struct {
4650
childer *childer
4751

48-
tableSize int
52+
// Entries per node (number of possible childs indexed by the partial key).
53+
tableSize int
54+
// Bits needed to encode child indexes (log2 of number of entries). This is
55+
// the number of bits taken from the hash key on each level of the tree.
4956
tableSizeLg2 int
5057

5158
builder cid.Builder
5259
hashFunc uint64
5360

61+
// String format with number of zeros that will be present in the hexadecimal
62+
// encoding of the child index to always reach the fixed maxpadlen chars.
63+
// Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal
64+
// format padding with zeros to always reach 4 characters).
5465
prefixPadStr string
55-
maxpadlen int
66+
// Length in chars of string that encodes child indexes. We encode indexes
67+
// as hexadecimal strings to this is log4 of number of entries.
68+
maxpadlen int
5669

5770
dserv ipld.DAGService
5871

72+
// FIXME: Remove. We don't actually store "value nodes". This confusing
73+
// abstraction just removes the maxpadlen from the link names to extract
74+
// the actual value link the trie is storing.
5975
// leaf node
6076
key string
6177
val *ipld.Link
@@ -68,6 +84,7 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
6884
return nil, err
6985
}
7086

87+
// FIXME: Make this at least a static configuration for testing.
7188
ds.hashFunc = HashMurmur3
7289
return ds, nil
7390
}
@@ -211,7 +228,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
211228
// name key in this Shard or its children. It also returns the previous link
212229
// under that name key (if any).
213230
func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) {
214-
hv := &hashBits{b: hash([]byte(name))}
231+
hv := newHashBits(name)
215232
err := ds.dserv.Add(ctx, node)
216233
if err != nil {
217234
return nil, err
@@ -221,6 +238,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node
221238
if err != nil {
222239
return nil, err
223240
}
241+
242+
// FIXME: We don't need to set the name here, it will get overwritten.
243+
// This is confusing, confirm and remove this line.
224244
lnk.Name = ds.linkNamePrefix(0) + name
225245

226246
return ds.setValue(ctx, hv, name, lnk)
@@ -236,13 +256,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error {
236256
// RemoveAndPrevious is similar to the public Remove but also returns the
237257
// old removed link (if it exists).
238258
func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) {
239-
hv := &hashBits{b: hash([]byte(name))}
259+
hv := newHashBits(name)
240260
return ds.setValue(ctx, hv, name, nil)
241261
}
242262

243263
// Find searches for a child node by 'name' within this hamt
244264
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
245-
hv := &hashBits{b: hash([]byte(name))}
265+
hv := newHashBits(name)
246266

247267
var out *ipld.Link
248268
err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
@@ -276,7 +296,7 @@ func (ds *Shard) childLinkType(lnk *ipld.Link) (linkType, error) {
276296

277297
// Link returns a merklelink to this shard node
278298
func (ds *Shard) Link() (*ipld.Link, error) {
279-
if ds.isValueNode() {
299+
if ds.IsValueNode() {
280300
return ds.val, nil
281301
}
282302

@@ -305,7 +325,7 @@ func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func
305325
return err
306326
}
307327

308-
if child.isValueNode() {
328+
if child.IsValueNode() {
309329
if child.key == key {
310330
return cb(child)
311331
}
@@ -332,6 +352,21 @@ func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
332352
return links, nil
333353
}
334354

355+
// FIXME: Check which functions do we need to actually expose.
356+
func (ds *Shard) EnumAll(ctx context.Context) ([]*ipld.Link, error) {
357+
var links []*ipld.Link
358+
359+
linkResults := ds.EnumAllAsync(ctx)
360+
361+
for linkResult := range linkResults {
362+
if linkResult.Err != nil {
363+
return links, linkResult.Err
364+
}
365+
links = append(links, linkResult.Link)
366+
}
367+
return links, nil
368+
}
369+
335370
// ForEachLink walks the Shard and calls the given function.
336371
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
337372
return ds.walkTrie(ctx, func(sv *Shard) error {
@@ -345,6 +380,31 @@ func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) erro
345380
// EnumLinksAsync returns a channel which will receive Links in the directory
346381
// as they are enumerated, where order is not guaranteed
347382
func (ds *Shard) EnumLinksAsync(ctx context.Context) <-chan format.LinkResult {
383+
linkResults := make(chan format.LinkResult)
384+
ctx, cancel := context.WithCancel(ctx)
385+
go func() {
386+
defer close(linkResults)
387+
defer cancel()
388+
getLinks := makeAsyncTrieGetLinks(ds.dserv, linkResults)
389+
cset := cid.NewSet()
390+
rootNode, err := ds.Node()
391+
if err != nil {
392+
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
393+
return
394+
}
395+
// FIXME: Make concurrency an option for testing.
396+
//err := dag.Walk(ctx, getLinks, ds.cid, cset.Visit, dag.Concurrent())
397+
err = dag.Walk(ctx, getLinks, rootNode.Cid(), cset.Visit)
398+
if err != nil {
399+
emitResult(ctx, linkResults, format.LinkResult{Link: nil, Err: err})
400+
}
401+
}()
402+
return linkResults
403+
}
404+
405+
// EnumLinksAsync returns a channel which will receive Links in the directory
406+
// as they are enumerated, where order is not guaranteed
407+
func (ds *Shard) EnumAllAsync(ctx context.Context) <-chan format.LinkResult {
348408
linkResults := make(chan format.LinkResult)
349409
ctx, cancel := context.WithCancel(ctx)
350410
go func() {
@@ -405,6 +465,39 @@ func makeAsyncTrieGetLinks(dagService ipld.DAGService, linkResults chan<- format
405465
}
406466
}
407467

468+
//// same as makeAsyncTrieGetLinks but return all
469+
//// FIXME: Check how to abstract this.
470+
//func makeAsyncTrieGetAll(dagService ipld.DAGService, linkResults chan<- format.LinkResult) dag.GetLinks {
471+
//
472+
// return func(ctx context.Context, currentCid cid.Cid) ([]*ipld.Link, error) {
473+
// node, err := dagService.Get(ctx, currentCid)
474+
// if err != nil {
475+
// return nil, err
476+
// }
477+
// directoryShard, err := NewHamtFromDag(dagService, node)
478+
// if err != nil {
479+
// return nil, err
480+
// }
481+
//
482+
// childShards := make([]*ipld.Link, 0, directoryShard.childer.length())
483+
// links := directoryShard.childer.links
484+
// for idx := range directoryShard.childer.children {
485+
// lnk := links[idx]
486+
// // We don't care about the link type (shard or value), just count
487+
// // *all* nodes in this HAMT.
488+
// emitResult(ctx, linkResults, format.LinkResult{Link: lnk, Err: nil})
489+
// lnkLinkType, err := directoryShard.childLinkType(lnk)
490+
// if err != nil {
491+
// return nil, err
492+
// }
493+
// if lnkLinkType == shardLink {
494+
// childShards = append(childShards, lnk)
495+
// }
496+
// }
497+
// return childShards, nil
498+
// }
499+
//}
500+
408501
func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r format.LinkResult) {
409502
// make sure that context cancel is processed first
410503
// the reason is due to the concurrency of EnumerateChildrenAsync
@@ -423,7 +516,7 @@ func emitResult(ctx context.Context, linkResults chan<- format.LinkResult, r for
423516

424517
func (ds *Shard) walkTrie(ctx context.Context, cb func(*Shard) error) error {
425518
return ds.childer.each(ctx, func(s *Shard) error {
426-
if s.isValueNode() {
519+
if s.IsValueNode() {
427520
if err := cb(s); err != nil {
428521
return err
429522
}
@@ -455,7 +548,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
455548
return
456549
}
457550

458-
if child.isValueNode() {
551+
if child.IsValueNode() {
459552
// Leaf node. This is the base case of this recursive function.
460553
if child.key == key {
461554
// We are in the correct shard (tree level) so we modify this child
@@ -489,10 +582,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
489582
return nil, err
490583
}
491584
child.builder = ds.builder
492-
chhv := &hashBits{
493-
b: hash([]byte(grandChild.key)),
494-
consumed: hv.consumed,
495-
}
585+
chhv := newConsumedHashBits(grandChild.key, hv.consumed)
496586

497587
// We explicitly ignore the oldValue returned by the next two insertions
498588
// (which will be nil) to highlight there is no overwrite here: they are
@@ -536,7 +626,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
536626
// Have we loaded the child? Prefer that.
537627
schild := child.childer.child(0)
538628
if schild != nil {
539-
if schild.isValueNode() {
629+
if schild.IsValueNode() {
540630
ds.childer.set(schild, i)
541631
}
542632
return

hamt/util.go

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
package hamt
22

33
import (
4+
"context"
5+
"encoding/binary"
46
"fmt"
5-
7+
ipld "github.com/ipfs/go-ipld-format"
8+
"github.com/ipfs/go-unixfs"
69
"github.com/spaolacci/murmur3"
10+
"math"
711
"math/bits"
812
)
913

@@ -13,6 +17,16 @@ type hashBits struct {
1317
consumed int
1418
}
1519

20+
func newHashBits(val string) *hashBits {
21+
return &hashBits{b: HAMTHashFunction([]byte(val))}
22+
}
23+
24+
func newConsumedHashBits(val string, consumed int) *hashBits {
25+
hv := &hashBits{b: HAMTHashFunction([]byte(val))}
26+
hv.consumed = consumed
27+
return hv
28+
}
29+
1630
func mkmask(n int) byte {
1731
return (1 << uint(n)) - 1
1832
}
@@ -61,8 +75,74 @@ func logtwo(v int) (int, error) {
6175
return lg2, nil
6276
}
6377

64-
func hash(val []byte) []byte {
78+
func murmur3Hash(val []byte) []byte {
6579
h := murmur3.New64()
6680
h.Write(val)
6781
return h.Sum(nil)
6882
}
83+
84+
// ONLY FOR TESTING: Return the same value as the hash.
85+
func IdHash(val []byte) []byte {
86+
return val
87+
}
88+
89+
// CreateCompleteHAMT creates a HAMT the following properties:
90+
// * its height (distance/edges from root to deepest node) is specified by treeHeight.
91+
// * all leaf Shard nodes have the same depth (and have only 'value' links).
92+
// * all internal Shard nodes point only to other Shards (and hence have zero 'value' links).
93+
// * the total number of 'value' links (directory entries) is:
94+
// io.DefaultShardWidth ^ treeHeight.
95+
// FIXME: HAMTHashFunction needs to be set to IdHash by the caller. We depend on
96+
// this simplification for the current logic to work. (HAMTHashFunction is a
97+
// global setting of the package, it is hard-coded in the serialized Shard node
98+
// and not allowed to be changed on a per HAMT/Shard basis.)
99+
// (If we didn't rehash inside setValue then we could just generate
100+
// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass
101+
// it as an argument making the hash independent of tree manipulation; that
102+
// sounds as the correct way to go in general and we wouldn't need this.)
103+
func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int) (ipld.Node, error) {
104+
if treeHeight < 1 {
105+
panic("treeHeight < 1")
106+
}
107+
if treeHeight > 8 {
108+
panic("treeHeight > 8: we don't allow a key larger than what can be enconded in a 64-bit word")
109+
}
110+
//if HAMTHashFunction != IdHash {
111+
// panic("we do not support a hash function other than ID")
112+
//}
113+
// FIXME: Any clean and simple way to do this? Otherwise remove check.
114+
115+
//childsPerNode := io.DefaultShardWidth
116+
childsPerNode := 256 // (FIXME: hard-coded as we have an 'import cycle not
117+
// allowed' error from io package otherwise.)
118+
// FIXME: Evaluate making this an argument.
119+
120+
rootShard, err := NewShard(ds, childsPerNode)
121+
if err != nil {
122+
return nil, err
123+
}
124+
// FIXME: Do we need to set the CID builder? Not part of the NewShard
125+
// interface so it shouldn't be mandatory.
126+
127+
// Assuming we are using the ID hash function we can just insert all
128+
// the combinations of a byte slice that will reach the desired height.
129+
totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
130+
for i := 0; i < totalChildren; i++ {
131+
var hashbuf [8]byte
132+
binary.LittleEndian.PutUint64(hashbuf[:], uint64(i))
133+
var oldLink *ipld.Link
134+
oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:treeHeight]), unixfs.EmptyFileNode())
135+
if err != nil {
136+
return nil, err
137+
}
138+
if oldLink != nil {
139+
// We shouldn't be overwriting any value, otherwise the tree
140+
// won't be complete.
141+
return nil, fmt.Errorf("we have overwritten entry %s",
142+
oldLink.Cid)
143+
}
144+
}
145+
// FIXME: Check depth of every Shard to be sure?
146+
147+
return rootShard.Node()
148+
}

hamt/util_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package hamt
22

33
import (
4+
"context"
5+
mdtest "github.com/ipfs/go-merkledag/test"
6+
"github.com/stretchr/testify/assert"
7+
"math"
48
"testing"
59
)
610

@@ -62,3 +66,23 @@ func TestHashBitsUneven(t *testing.T) {
6266
t.Fatalf("expected 20269, but got %b (%d)", v, v)
6367
}
6468
}
69+
70+
func TestCreateCompleteShard(t *testing.T) {
71+
ds := mdtest.Mock()
72+
childsPerNode := 256
73+
treeHeight := 2 // This is the limit of what we can fastly generate,
74+
// the default width is too big (256). We may need to refine
75+
// CreateCompleteHAMT encoding of the key to reduce the tableSize.
76+
node, err := CreateCompleteHAMT(ds, treeHeight)
77+
assert.NoError(t, err)
78+
79+
shard, err := NewHamtFromDag(ds, node)
80+
assert.NoError(t, err)
81+
links, err := shard.EnumAll(context.Background())
82+
assert.NoError(t, err)
83+
84+
childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
85+
//internalNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight-1)))
86+
//totalNodes := childNodes + internalNodes
87+
assert.Equal(t, childNodes, len(links))
88+
}

0 commit comments

Comments
 (0)