@@ -107,34 +107,20 @@ def cache_full_blocks(
107
107
assert prev_block .block_hash is not None
108
108
prev_block_hash_value = prev_block .block_hash .hash_value
109
109
110
- # Find the first uncached block.
111
- # FIXME: num_cached_blocks should be corrected by the caller
112
- # so this should never happen.
113
- offset = 0
114
- for blk in new_full_blocks :
115
- if blk .block_hash is None :
116
- break
117
- else :
118
- prev_block_hash_value = blk .block_hash .hash_value
119
- offset += 1
120
- else :
121
- # All blocks are cached.
122
- return
123
-
124
- for i , blk in enumerate (new_full_blocks [offset :]):
125
- blk_idx = num_cached_blocks + offset + i
110
+ for i , blk in enumerate (new_full_blocks ):
126
111
assert blk .block_hash is None
127
112
128
- if i + offset < len (new_block_hashes ):
113
+ if i < len (new_block_hashes ):
129
114
# The block hash may already be computed in
130
115
# "get_computed_blocks" if the tokens are not generated by
131
116
# this request (either the prompt tokens or the previously
132
117
# generated tokens with preemption). In this case we simply
133
118
# reuse the block hash.
134
- block_hash = new_block_hashes [i + offset ]
119
+ block_hash = new_block_hashes [i ]
135
120
else :
136
121
# Otherwise compute the block hash and cache it in the request
137
122
# in case it will be preempted in the future.
123
+ blk_idx = num_cached_blocks + i
138
124
start_token_idx = blk_idx * block_size
139
125
end_token_idx = (blk_idx + 1 ) * block_size
140
126
block_tokens = request .all_token_ids [
0 commit comments