11"""vLLM: a high-throughput and memory-efficient inference engine for LLMs""" 
2+ import  os 
3+ 
4+ import  torch 
25
36from  vllm .engine .arg_utils  import  AsyncEngineArgs , EngineArgs 
47from  vllm .engine .async_llm_engine  import  AsyncLLMEngine 
1720
1821from  .version  import  __version__ , __version_tuple__ 
1922
23+ # set some common config/environment variables that should be set 
24+ # for all processes created by vllm and all processes 
25+ # that interact with vllm workers. 
26+ # they are executed whenever `import vllm` is called. 
2027
21- def  configure_as_vllm_process ():
22-     """ 
23-     set some common config/environment variables that should be set 
24-     for all processes created by vllm and all processes 
25-     that interact with vllm workers. 
26-     """ 
27-     import  os 
28- 
29-     import  torch 
30- 
31-     # see https://github.com/NVIDIA/nccl/issues/1234 
32-     os .environ ['NCCL_CUMEM_ENABLE' ] =  '0' 
33- 
34-     # see https://github.com/vllm-project/vllm/issues/10480 
35-     os .environ ['TORCHINDUCTOR_COMPILE_THREADS' ] =  '1' 
36-     # see https://github.com/vllm-project/vllm/issues/10619 
37-     torch ._inductor .config .compile_threads  =  1 
38- 
39-     from  vllm .platforms  import  current_platform 
40- 
41-     if  current_platform .is_xpu ():
42-         # see https://github.com/pytorch/pytorch/blob/43c5f59/torch/_dynamo/config.py#L158 
43-         torch ._dynamo .config .disable  =  True 
44-     elif  current_platform .is_hpu ():
45-         # NOTE(kzawora): PT HPU lazy backend (PT_HPU_LAZY_MODE = 1) 
46-         # does not support torch.compile 
47-         # Eager backend (PT_HPU_LAZY_MODE = 0) must be selected for 
48-         # torch.compile support 
49-         is_lazy  =  os .environ .get ('PT_HPU_LAZY_MODE' , '1' ) ==  '1' 
50-         if  is_lazy :
51-             torch ._dynamo .config .disable  =  True 
52-             # NOTE(kzawora) multi-HPU inference with HPUGraphs (lazy-only) 
53-             # requires enabling lazy collectives 
54-             # see https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_HPU_Graphs.html # noqa: E501 
55-             os .environ ['PT_HPU_ENABLE_LAZY_COLLECTIVES' ] =  'true' 
28+ # see https://github.com/NVIDIA/nccl/issues/1234 
29+ os .environ ['NCCL_CUMEM_ENABLE' ] =  '0' 
5630
31+ # see https://github.com/vllm-project/vllm/issues/10480 
32+ os .environ ['TORCHINDUCTOR_COMPILE_THREADS' ] =  '1' 
33+ # see https://github.com/vllm-project/vllm/issues/10619 
34+ torch ._inductor .config .compile_threads  =  1 
5735
5836__all__  =  [
5937    "__version__" ,
@@ -80,5 +58,4 @@ def configure_as_vllm_process():
8058    "AsyncEngineArgs" ,
8159    "initialize_ray_cluster" ,
8260    "PoolingParams" ,
83-     "configure_as_vllm_process" ,
8461]
0 commit comments