[Perf] Cache vllm.env.__getattr__ result to avoid recomputation (vllm-project#26146)

Jialin · xuebwang-amd · commit ba488a6b0b4b · 2025-10-24T09:30:37.000Z
Signed-off-by: Jialin Ouyang &lt;Jialin.Ouyang@gmail.com&gt;
Signed-off-by: xuebwang-amd &lt;xuebwang@amd.com&gt;
diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -6,7 +6,54 @@
 
 import pytest
 
-from vllm.envs import env_list_with_choices, env_with_choices
+import vllm.envs as envs
+from vllm.envs import (
+    enable_envs_cache,
+    env_list_with_choices,
+    env_with_choices,
+    environment_variables,
+)
+
+
+def test_getattr_without_cache(monkeypatch: pytest.MonkeyPatch):
+    assert envs.VLLM_HOST_IP == ""
+    assert envs.VLLM_PORT is None
+    monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1")
+    monkeypatch.setenv("VLLM_PORT", "1234")
+    assert envs.VLLM_HOST_IP == "1.1.1.1"
+    assert envs.VLLM_PORT == 1234
+    # __getattr__ is not decorated with functools.cache
+    assert not hasattr(envs.__getattr__, "cache_info")
+
+
+def test_getattr_with_cache(monkeypatch: pytest.MonkeyPatch):
+    monkeypatch.setenv("VLLM_HOST_IP", "1.1.1.1")
+    monkeypatch.setenv("VLLM_PORT", "1234")
+    # __getattr__ is not decorated with functools.cache
+    assert not hasattr(envs.__getattr__, "cache_info")
+
+    # Enable envs cache and ignore ongoing environment changes
+    enable_envs_cache()
+
+    # __getattr__ is not decorated with functools.cache
+    assert hasattr(envs.__getattr__, "cache_info")
+    start_hits = envs.__getattr__.cache_info().hits
+
+    # 2 more hits due to VLLM_HOST_IP and VLLM_PORT accesses
+    assert envs.VLLM_HOST_IP == "1.1.1.1"
+    assert envs.VLLM_PORT == 1234
+    assert envs.__getattr__.cache_info().hits == start_hits + 2
+
+    # All environment variables are cached
+    for environment_variable in environment_variables:
+        envs.__getattr__(environment_variable)
+    assert envs.__getattr__.cache_info().hits == start_hits + 2 + len(
+        environment_variables
+    )
+
+    # Reset envs.__getattr__ back to none-cached version to
+    # avoid affecting other tests
+    envs.__getattr__ = envs.__getattr__.__wrapped__
 
 
 class TestEnvWithChoices:
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import functools
 import hashlib
 import json
 import os
@@ -1408,12 +1409,36 @@ def get_vllm_port() -> int | None:
 
 
 def __getattr__(name: str):
-    # lazy evaluation of environment variables
+    """
+    Gets environment variables lazily.
+
+    NOTE: After enable_envs_cache() invocation (which triggered after service
+    initialization), all environment variables will be cached.
+    """
     if name in environment_variables:
         return environment_variables[name]()
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 
+def enable_envs_cache() -> None:
+    """
+    Enables caching of environment variables. This is useful for performance
+    reasons, as it avoids the need to re-evaluate environment variables on
+    every call.
+
+    NOTE: Currently, it's invoked after service initialization to reduce
+    runtime overhead. This also means that environment variables should NOT
+    be updated after the service is initialized.
+    """
+    # Tag __getattr__ with functools.cache
+    global __getattr__
+    __getattr__ = functools.cache(__getattr__)
+
+    # Cache all environment variables
+    for key in environment_variables:
+        __getattr__(key)
+
+
 def __dir__():
     return list(environment_variables.keys())
 
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -20,6 +20,7 @@
 from vllm.config import ParallelConfig, VllmConfig
 from vllm.distributed import stateless_destroy_torch_distributed_process_group
 from vllm.distributed.parallel_state import is_global_first_rank
+from vllm.envs import enable_envs_cache
 from vllm.logger import init_logger
 from vllm.logging_utils.dump_input import dump_engine_exception
 from vllm.lora.request import LoRARequest
@@ -601,6 +602,10 @@ def __init__(
         # If enable, attach GC debugger after static variable freeze.
         maybe_attach_gc_debug_callback()
 
+        # Enable environment variable cache (e.g. assume no more
+        # environment variable overrides after this point)
+        enable_envs_cache()
+
     @contextmanager
     def _perform_handshakes(
         self,
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
@@ -33,6 +33,7 @@
     get_pp_group,
     get_tp_group,
 )
+from vllm.envs import enable_envs_cache
 from vllm.logger import init_logger
 from vllm.utils import (
     _maybe_force_spawn,
@@ -455,6 +456,10 @@ def __init__(
         # Load model
         self.worker.load_model()
 
+        # Enable environment variable cache (e.g. assume no more
+        # environment variable overrides after this point)
+        enable_envs_cache()
+
     @staticmethod
     def make_worker_process(
         vllm_config: VllmConfig,