@@ -431,12 +431,12 @@ void WeightsPtrs::CopyFrom(const WeightsPtrs& other) {
431431void WeightsPtrs::Fixup (std::vector<MatOwner>& mat_owners,
432432 ThreadingContext& ctx) {
433433 const size_t cluster_idx = 0 ;
434- ParallelFor (ParallelismStrategy ::kFlat , c_layers.size (), ctx, cluster_idx,
434+ ParallelFor (Parallelism ::kFlat , c_layers.size (), ctx, cluster_idx,
435435 Callers::kFixupWeights , [&](uint64_t layer, size_t /* worker*/ ) {
436436 GetLayer (layer)->Fixup (mat_owners, ctx);
437437 });
438438
439- ParallelFor (ParallelismStrategy ::kFlat , vit_layers.size (), ctx, cluster_idx,
439+ ParallelFor (Parallelism ::kFlat , vit_layers.size (), ctx, cluster_idx,
440440 Callers::kFixupWeights , [&](uint64_t layer, size_t /* worker*/ ) {
441441 VitLayer (layer)->Fixup (mat_owners, ctx);
442442 });
@@ -527,7 +527,7 @@ static void AllocateAndBindAll(std::vector<TensorToRead>& tensors,
527527
528528 // Allocate in parallel because faulting in large tensors is slow.
529529 ParallelFor (
530- ParallelismStrategy ::kFlat , tensors.size (), ctx, /* cluster_idx=*/ 0 ,
530+ Parallelism ::kFlat , tensors.size (), ctx, /* cluster_idx=*/ 0 ,
531531 Callers::kAllocateAndBindAll , [&](uint64_t task, size_t /* thread*/ ) {
532532 TensorToRead& tensor = tensors[task];
533533 MatPtr& mat = *tensor.mat ;
@@ -586,10 +586,9 @@ static void DecompressToBF16(MatPtr& mat,
586586static void ReadAllToBF16 (const std::vector<TensorToRead>& tensors,
587587 const BlobReader& reader, ThreadingContext& ctx) {
588588 // Especially TSAN is slow enough to warrant hierarchical parallelism.
589- const ParallelismStrategy strategy = HWY_IS_DEBUG_BUILD
590- ? ParallelismStrategy::kHierarchical
591- : ParallelismStrategy::kFlat ;
592- ParallelFor (strategy, tensors.size (), ctx, /* cluster_idx=*/ 0 ,
589+ const Parallelism parallelism =
590+ HWY_IS_DEBUG_BUILD ? Parallelism::kHierarchical : Parallelism::kFlat ;
591+ ParallelFor (parallelism, tensors.size (), ctx, /* cluster_idx=*/ 0 ,
593592 Callers::kReadAllToBF16 , [&](uint64_t task, size_t thread) {
594593 GCPP_ZONE (ctx, thread, Zones::kStartupWeightsReadAllToBF16 );
595594 const TensorToRead& tensor = tensors[task];
@@ -677,7 +676,7 @@ static void ReadBatches(const BlobReader& reader,
677676 const std::vector<IOBatch>& batches,
678677 ThreadingContext& ctx) {
679678 // >5x speedup from parallel reads when cached.
680- ParallelFor (ParallelismStrategy ::kHierarchical , batches.size (), ctx,
679+ ParallelFor (Parallelism ::kHierarchical , batches.size (), ctx,
681680 /* cluster_idx=*/ 0 , Callers::kReadBatches ,
682681 [&](uint64_t task, size_t thread) {
683682 GCPP_ZONE (ctx, thread, Zones::kStartupWeightsReadBatches );
0 commit comments