Skip to content

Commit 5fb35ad

Browse files
committed
Add workaround for requests that does not fit in warmup shapes
Revert "disable stats for test and warn users" This reverts commit 03cc587. Signed-off-by: Wallas Santos <[email protected]>
1 parent 03cc587 commit 5fb35ad

File tree

3 files changed

+18
-17
lines changed

3 files changed

+18
-17
lines changed

tests/spyre_util.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ def __init__(self,
6464
env = os.environ.copy()
6565
if env_dict is not None:
6666
env.update(env_dict)
67-
68-
# TODO: Re-enable stats for vllm-spyre plugin
69-
# See: https://github.com/vllm-project/vllm-spyre/issues/68
70-
vllm_serve_args.append("--disable-log-stats")
7167
self.proc = subprocess.Popen(
7268
["vllm", "serve", model, *vllm_serve_args],
7369
env=env,

tests/test_spyre_online.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from tests.spyre_util import (RemoteOpenAIServer, get_spyre_backend_list,
55
get_spyre_model_list)
6+
from vllm_spyre.v1.core.scheduler import NO_WARMUP_FIT_STOP_REASON
67

78

89
@pytest.mark.parametrize("model", get_spyre_model_list())
@@ -71,4 +72,11 @@ def test_openai_serving(model, warmup_shape, backend, vllm_version):
7172
max_tokens=25)
7273

7374
assert len(completion.choices) == 1
74-
assert len(completion.choices[0].text) == 0
75+
76+
# TODO: V0 and V1 have slight different behavior for requests
77+
# that do not fit in a warmup shape
78+
if vllm_version == 'V0':
79+
assert len(completion.choices[0].text) == 0
80+
elif vllm_version == 'V1':
81+
assert completion.choices[0].stop_reason == \
82+
NO_WARMUP_FIT_STOP_REASON

vllm_spyre/v1/core/scheduler.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
logger = init_logger(__name__)
2424

25+
NO_WARMUP_FIT_STOP_REASON = "Request did not fit any warmup shape"
26+
2527

2628
class SpyreScheduler(Scheduler):
2729
"""Small extension of the V1 scheduler that adds constraints for Sypre:
@@ -47,13 +49,6 @@ def __init__(self, *args, **kwargs) -> None:
4749

4850
self.rejected_requests: set[str] = set()
4951

50-
if self.log_stats:
51-
logger.warning_once(
52-
"Log stats for V1 is not working properly. Requests that do "
53-
"not fit in warmup shapes will crash the engine. "
54-
"Pass --disable-log-stats to disable stats and this message. "
55-
"See https://github.com/vllm-project/vllm-spyre/issues/68")
56-
5752
def add_request(self, request: Request) -> None:
5853
"""This override rejects requests that fit no warmup shape"""
5954
if len(
@@ -192,11 +187,13 @@ def _reject_from_queue(self,
192187
for request in rejected_requests:
193188
queue.remove(request)
194189
reject_outputs.append(
195-
EngineCoreOutput(request.request_id,
196-
new_token_ids=[],
197-
finish_reason=FinishReason.ABORT,
198-
stop_reason="Request did not fit any warmup "
199-
"shape"))
190+
EngineCoreOutput(
191+
request.request_id,
192+
# TODO: FIXME
193+
# Dummy token prevent stats collection crash
194+
new_token_ids=[0],
195+
finish_reason=FinishReason.ABORT,
196+
stop_reason=NO_WARMUP_FIT_STOP_REASON))
200197
request.status = RequestStatus.FINISHED_ABORTED
201198
self._free_request(request)
202199
self.rejected_requests.remove(request.request_id)

0 commit comments

Comments
 (0)