9
9
"strings"
10
10
"sync"
11
11
"testing"
12
+ "time"
12
13
13
14
cid "github.com/ipfs/go-cid"
14
15
ipld "github.com/ipfs/go-ipld-format"
@@ -328,13 +329,16 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) {
328
329
oldHashFunc := hamt .HAMTHashFunction
329
330
defer func () { hamt .HAMTHashFunction = oldHashFunc }()
330
331
hamt .HAMTHashFunction = hamt .IdHash
331
- //oldShardWidth := DefaultShardWidth
332
- //defer func() { DefaultShardWidth = oldShardWidth }()
333
- //DefaultShardWidth = 8
334
- // FIXME: We should be able to use a smaller DefaultShardWidth to have
335
- // a deeper tree and cheaper tests once the import cycle is resolved
336
- // in hamt.CreateCompleteHAMT and the DefaultShardWidth value is not
337
- // hardcoded there.
332
+ oldShardWidth := DefaultShardWidth
333
+ defer func () { DefaultShardWidth = oldShardWidth }()
334
+ DefaultShardWidth = 16 // FIXME: Review number. From 256 to 16 or 8 (if
335
+ // (if we fix CreateCompleteHAMT).
336
+
337
+ // FIXME: Taken from private github.com/ipfs/[email protected] /merkledag.go.
338
+ // (We can also pass an explicit concurrency value in `(*Shard).EnumLinksAsync()`
339
+ // and take ownership of this configuration, but departing from the more
340
+ // standard and reliable one in `go-merkledag`.
341
+ defaultConcurrentFetch := 32
338
342
339
343
// We create a "complete" HAMT (see CreateCompleteHAMT for more details)
340
344
// with a regular structure to be able to predict how many Shard nodes we
@@ -343,34 +347,52 @@ func TestHAMTEnumerationWhenComputingSize(t *testing.T) {
343
347
oldHamtOption := HAMTShardingSize
344
348
defer func () { HAMTShardingSize = oldHamtOption }()
345
349
// (Some arbitrary values below that make this test not that expensive.)
346
- treeHeight := 2
347
- thresholdToWidthRatio := 4 // How many leaf shards nodes (with value links,
350
+ treeHeight := 3 // FIXME: Review number. From 2 to 3.
351
+ // How many leaf shards nodes (with value links,
348
352
// i.e., directory entries) do we need to reach the threshold.
353
+ thresholdToWidthRatio := 4
354
+ // FIXME: Review dag.Walk algorithm to better figure out this estimate.
355
+
349
356
HAMTShardingSize = DefaultShardWidth * thresholdToWidthRatio
350
- // With this structure we will then need to fetch the following nodes:
357
+ // With this structure and a BFS traversal (from `parallelWalkDepth`) then
358
+ // we would roughly fetch the following nodes:
359
+ nodesToFetch := 0
360
+ // * all layers up to (but not including) the last one with leaf nodes
361
+ // (because it's a BFS)
362
+ for i := 0 ; i < treeHeight ; i ++ {
363
+ nodesToFetch += int (math .Pow (float64 (DefaultShardWidth ), float64 (i )))
364
+ }
351
365
// * `thresholdToWidthRatio` leaf Shards with enough value links to reach
352
366
// the HAMTShardingSize threshold.
353
- // * `(treeHeight - 1)` internal nodes to reach those leaf Shard nodes
354
- // (assuming we have thresholdToWidthRatio below the DefaultShardWidth,
355
- // i.e., all leaf nodes come from the same parent).
356
- nodesToFetch := thresholdToWidthRatio + treeHeight - 1
367
+ nodesToFetch += thresholdToWidthRatio
368
+ // * `defaultConcurrentFetch` potential extra nodes of the threads working
369
+ // in parallel
370
+ nodesToFetch += defaultConcurrentFetch
371
+
357
372
ds := mdtest .Mock ()
358
- completeHAMTRoot , err := hamt .CreateCompleteHAMT (ds , treeHeight )
373
+ completeHAMTRoot , err := hamt .CreateCompleteHAMT (ds , treeHeight , DefaultShardWidth )
359
374
assert .NoError (t , err )
360
375
361
376
countGetsDS := newCountGetsDS (ds )
362
377
hamtDir , err := newHAMTDirectoryFromNode (countGetsDS , completeHAMTRoot )
363
378
assert .NoError (t , err )
364
379
365
380
countGetsDS .resetCounter ()
381
+ countGetsDS .setRequestDelay (10 * time .Millisecond )
366
382
// FIXME: Only works with sequential DAG walk (now hardcoded, needs to be
367
383
// added to the internal API) where we can predict the Get requests and
368
384
// tree traversal. It would be desirable to have some test for the concurrent
369
385
// walk (which is the one used in production).
370
386
below , err := hamtDir .sizeBelowThreshold (context .TODO (), 0 )
371
387
assert .NoError (t , err )
372
388
assert .False (t , below )
373
- assert .Equal (t , nodesToFetch , countGetsDS .uniqueCidsFetched ())
389
+ t .Logf ("fetched %d/%d nodes" , countGetsDS .uniqueCidsFetched (), nodesToFetch )
390
+ assert .True (t , countGetsDS .uniqueCidsFetched () <= nodesToFetch )
391
+ assert .True (t , countGetsDS .uniqueCidsFetched () >= nodesToFetch - defaultConcurrentFetch )
392
+ // (Without the `setRequestDelay` above the number of nodes fetched
393
+ // drops dramatically and unpredictably as the BFS starts to behave
394
+ // more like a DFS because some search paths are fetched faster than
395
+ // others.)
374
396
}
375
397
376
398
// Compare entries in the leftDir against the rightDir and possibly
@@ -519,6 +541,8 @@ type countGetsDS struct {
519
541
520
542
cidsFetched map [cid.Cid ]struct {}
521
543
mapLock sync.Mutex
544
+
545
+ getRequestDelay time.Duration
522
546
}
523
547
524
548
var _ ipld.DAGService = (* countGetsDS )(nil )
@@ -528,6 +552,7 @@ func newCountGetsDS(ds ipld.DAGService) *countGetsDS {
528
552
ds ,
529
553
make (map [cid.Cid ]struct {}),
530
554
sync.Mutex {},
555
+ 0 ,
531
556
}
532
557
}
533
558
@@ -543,30 +568,31 @@ func (d *countGetsDS) uniqueCidsFetched() int {
543
568
return len (d .cidsFetched )
544
569
}
545
570
571
+ func (d * countGetsDS ) setRequestDelay (timeout time.Duration ) {
572
+ d .getRequestDelay = timeout
573
+ }
574
+
546
575
func (d * countGetsDS ) Get (ctx context.Context , c cid.Cid ) (ipld.Node , error ) {
547
576
node , err := d .DAGService .Get (ctx , c )
548
577
if err != nil {
549
578
return nil , err
550
579
}
551
580
552
581
d .mapLock .Lock ()
582
+ _ , cidRequestedBefore := d .cidsFetched [c ]
553
583
d .cidsFetched [c ] = struct {}{}
554
584
d .mapLock .Unlock ()
555
585
586
+ if d .getRequestDelay != 0 && ! cidRequestedBefore {
587
+ // First request gets a timeout to simulate a network fetch.
588
+ // Subsequent requests get no timeout simulating an in-disk cache.
589
+ time .Sleep (d .getRequestDelay )
590
+ }
591
+
556
592
return node , nil
557
593
}
558
594
559
595
// Process sequentially (blocking) calling Get which tracks requests.
560
596
func (d * countGetsDS ) GetMany (ctx context.Context , cids []cid.Cid ) <- chan * ipld.NodeOption {
561
- out := make (chan * ipld.NodeOption , len (cids ))
562
- defer close (out )
563
- for _ , c := range cids {
564
- node , err := d .Get (ctx , c )
565
- if err != nil {
566
- out <- & ipld.NodeOption {Err : err }
567
- break
568
- }
569
- out <- & ipld.NodeOption {Node : node }
570
- }
571
- return out
597
+ panic ("GetMany not supported" )
572
598
}
0 commit comments