@@ -496,8 +496,7 @@ def test_allocate_with_lookahead():
496496
497497    # Test case 1: Requires additional lookahead tokens 
498498    kv_cache_manager  =  KVCacheManager (kv_cache_config = config ,
499-                                       max_model_len = 100 ,
500-                                       num_preallocate_tokens = 0 )
499+                                       max_model_len = 100 )
501500    blocks  =  kv_cache_manager .allocate_slots (
502501        request ,
503502        num_tokens = 3 ,
@@ -507,25 +506,19 @@ def test_allocate_with_lookahead():
507506
508507    # Test case 2: With precomputed blocks 
509508    kv_cache_manager  =  KVCacheManager (kv_cache_config = config ,
510-                                       max_model_len = 100 ,
511-                                       num_preallocate_tokens = 4 )
512-     # num_preallocate_blocks = 4 // 4 - 2 // 4 = 1 
509+                                       max_model_len = 100 )
513510    # required_blocks = ceil((3 + 2) /4) = 2 
514-     # total_blocks = 1 + 2 = 3 
515511    blocks  =  kv_cache_manager .allocate_slots (
516512        request ,
517513        num_tokens = 3 ,
518514        num_lookahead_tokens = 2 ,
519515    )
520-     assert  len (blocks ) ==  3 
516+     assert  len (blocks ) ==  2 
521517
522518    # Test case 3: With precomputed blocks 
523-     # num_preallocate_blocks = 4 // 4 - 4 // 4 = 0 
524519    # required_blocks = ceil((3 + 4) / 4) = 2 
525-     # total_blocks = 0 + 2 = 2 
526520    kv_cache_manager  =  KVCacheManager (kv_cache_config = config ,
527-                                       max_model_len = 100 ,
528-                                       num_preallocate_tokens = 4 )
521+                                       max_model_len = 100 )
529522    blocks  =  kv_cache_manager .allocate_slots (
530523        request ,
531524        num_tokens = 3 ,
0 commit comments