diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index bb42b5f29a72..4791a2dd4ed4 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -199,8 +199,6 @@ steps: - vllm/spec_decode - tests/spec_decode commands: - # See https://github.com/vllm-project/vllm/issues/5152 - - export VLLM_ATTENTION_BACKEND=XFORMERS - pytest -v -s spec_decode/e2e/test_multistep_correctness.py - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py diff --git a/tests/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py index e7a0af437763..6fa386ffab12 100644 --- a/tests/spec_decode/test_multi_step_worker.py +++ b/tests/spec_decode/test_multi_step_worker.py @@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step(): worker.model_runner._gpu_advance_step.side_effect = ValueError( exception_secret) - seq_group_metadata_list, _, _ = create_batch(batch_size, k) + seq_group_metadata_list, _, _ = create_batch(batch_size, + k, + block_size=block_size, + num_gpu_blocks=num_gpu_blocks) # Fallback (should not call) when num_steps=1. execute_model_req = ExecuteModelRequest(