Skip to content

Commit 1a143d1

Browse files
committed
style: fix linting, typo and outdated code
Signed-off-by: Wallas Santos <[email protected]>
1 parent bc78d7d commit 1a143d1

File tree

2 files changed

+9
-14
lines changed

2 files changed

+9
-14
lines changed

vllm_spyre/v1/worker/spyre_model_runner.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def _prepare_pad_input_ids(
144144
seq_len = input_ids_i.size(0)
145145
if max_len > seq_len:
146146
logger.info(
147-
"Left padding request ofla length %d tokens to %d tokens.",
147+
"Left padding request of length %d tokens to %d tokens.",
148148
seq_len, max_len)
149149
pads = torch.ones(max_len - seq_len,
150150
dtype=torch.long,
@@ -220,9 +220,7 @@ def complete_warmup(self):
220220
"""Turn off warmup mode once the warmup is complete"""
221221
self.warmup_mode = False
222222

223-
def _update_states(
224-
self,
225-
scheduler_output: SchedulerOutput):
223+
def _update_states(self, scheduler_output: SchedulerOutput):
226224
# Update the states of the running/resumed requests.
227225
# Update input_batch's `token_ids_cpu`,
228226
# `num_tokens`. For continuous batching it cleans
@@ -587,9 +585,9 @@ def __init__(
587585
# TODO: Remove this once we can prefill and decode
588586
# in the same step
589587
self.prefill_batch = InputBatch(
590-
# TODO: review this, currently we only support prefill for
588+
# TODO: review this, currently we only support prefill for
591589
# `batch_size=1`
592-
max_num_reqs=1,
590+
max_num_reqs=1,
593591
max_model_len=vllm_config.model_config.max_model_len,
594592
device=self.device,
595593
pin_memory=self.pin_memory,
@@ -598,11 +596,11 @@ def __init__(
598596

599597
# Requests
600598
self.requests: dict[str, CachedRequestData] = {}
601-
599+
602600
def _update_states(self, scheduler_output):
603-
601+
604602
super()._update_states(scheduler_output)
605-
603+
606604
# Continuous batching stuff
607605
for req_id in scheduler_output.finished_req_ids:
608606
if req_id in self.req_ids2blocks:
@@ -611,7 +609,7 @@ def _update_states(self, scheduler_output):
611609
self.free_blocks.append(freed_block)
612610
del self.req_ids2blocks[req_id]
613611
del self.req_ids2left_pads[req_id]
614-
612+
615613
[self.input_batch.remove_request(req_id) \
616614
for req_id in scheduler_output.finished_req_ids]
617615

@@ -636,9 +634,6 @@ def _prepare_prompt(
636634
# Internal state is managed here.
637635
slot_mapping = []
638636

639-
# TODO: we are deactivating all, because we
640-
# only encode or prefill at time.
641-
# self.input_batch.deactivate_all_requests()
642637
self.prefill_batch.clear_requests()
643638

644639
for request_data in new_requests:

vllm_spyre/v1/worker/spyre_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def _warmup_spyre_dynamic_size(self, special_token_ids):
356356
scheduled_cached_reqs=[],
357357
num_scheduled_tokens={},
358358
# NOTE: this means no work to do
359-
total_num_scheduled_tokens=0,
359+
total_num_scheduled_tokens=0,
360360
scheduled_spec_decode_tokens={},
361361
scheduled_encoder_inputs={},
362362
num_common_prefix_blocks=0,

0 commit comments

Comments
 (0)