|
36 | 36 | from vllm.logger import init_logger |
37 | 37 | from vllm.platforms import CpuArchEnum, current_platform |
38 | 38 | from vllm.plugins import load_general_plugins |
| 39 | +from vllm.ray.lazy_utils import is_ray_initialized |
39 | 40 | from vllm.reasoning import ReasoningParserManager |
40 | 41 | from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3 |
41 | 42 | from vllm.transformers_utils.utils import check_gguf_file |
@@ -1099,6 +1100,15 @@ def create_engine_config( |
1099 | 1100 | kv_sharing_fast_prefill=self.kv_sharing_fast_prefill, |
1100 | 1101 | ) |
1101 | 1102 |
|
| 1103 | + ray_runtime_env = None |
| 1104 | + if is_ray_initialized(): |
| 1105 | + # Ray Serve LLM calls `create_engine_config` in the context |
| 1106 | + # of a Ray task, therefore we check is_ray_initialized() |
| 1107 | + # as opposed to is_in_ray_actor(). |
| 1108 | + import ray |
| 1109 | + ray_runtime_env = ray.get_runtime_context().runtime_env |
| 1110 | + logger.info("Using ray runtime env: %s", ray_runtime_env) |
| 1111 | + |
1102 | 1112 | # Get the current placement group if Ray is initialized and |
1103 | 1113 | # we are in a Ray actor. If so, then the placement group will be |
1104 | 1114 | # passed to spawned processes. |
@@ -1211,6 +1221,7 @@ def create_engine_config( |
1211 | 1221 | max_parallel_loading_workers=self.max_parallel_loading_workers, |
1212 | 1222 | disable_custom_all_reduce=self.disable_custom_all_reduce, |
1213 | 1223 | ray_workers_use_nsight=self.ray_workers_use_nsight, |
| 1224 | + ray_runtime_env=ray_runtime_env, |
1214 | 1225 | placement_group=placement_group, |
1215 | 1226 | distributed_executor_backend=self.distributed_executor_backend, |
1216 | 1227 | worker_cls=self.worker_cls, |
|
0 commit comments