4242from vllm .entrypoints .launcher import serve_http
4343from vllm .entrypoints .logger import RequestLogger
4444from vllm .entrypoints .openai .cli_args import (make_arg_parser ,
45+ validate_lora_cache_args ,
4546 validate_parsed_serve_args )
4647# yapf conflicts with isort for this block
4748# yapf: disable
@@ -951,6 +952,7 @@ async def init_app_state(
951952 args .response_role ,
952953 request_logger = request_logger ,
953954 chat_template = resolved_chat_template ,
955+ lora_cache_dir = args .lora_cache_dir ,
954956 chat_template_content_format = args .chat_template_content_format ,
955957 return_tokens_as_token_ids = args .return_tokens_as_token_ids ,
956958 enable_auto_tools = args .enable_auto_tool_choice ,
@@ -963,6 +965,7 @@ async def init_app_state(
963965 engine_client ,
964966 model_config ,
965967 state .openai_serving_models ,
968+ lora_cache_dir = args .lora_cache_dir ,
966969 request_logger = request_logger ,
967970 return_tokens_as_token_ids = args .return_tokens_as_token_ids ,
968971 ) if model_config .runner_type == "generate" else None
@@ -972,6 +975,7 @@ async def init_app_state(
972975 state .openai_serving_models ,
973976 request_logger = request_logger ,
974977 chat_template = resolved_chat_template ,
978+ lora_cache_dir = args .lora_cache_dir ,
975979 chat_template_content_format = args .chat_template_content_format ,
976980 ) if model_config .runner_type == "pooling" else None
977981 state .openai_serving_embedding = OpenAIServingEmbedding (
@@ -980,24 +984,28 @@ async def init_app_state(
980984 state .openai_serving_models ,
981985 request_logger = request_logger ,
982986 chat_template = resolved_chat_template ,
987+ lora_cache_dir = args .lora_cache_dir ,
983988 chat_template_content_format = args .chat_template_content_format ,
984989 ) if model_config .task == "embed" else None
985990 state .openai_serving_scores = ServingScores (
986991 engine_client ,
987992 model_config ,
988993 state .openai_serving_models ,
994+ lora_cache_dir = args .lora_cache_dir ,
989995 request_logger = request_logger ) if model_config .task in (
990996 "score" , "embed" , "pooling" ) else None
991997 state .jinaai_serving_reranking = ServingScores (
992998 engine_client ,
993999 model_config ,
9941000 state .openai_serving_models ,
1001+ lora_cache_dir = args .lora_cache_dir ,
9951002 request_logger = request_logger
9961003 ) if model_config .task == "score" else None
9971004 state .openai_serving_tokenization = OpenAIServingTokenization (
9981005 engine_client ,
9991006 model_config ,
10001007 state .openai_serving_models ,
1008+ lora_cache_dir = args .lora_cache_dir ,
10011009 request_logger = request_logger ,
10021010 chat_template = resolved_chat_template ,
10031011 chat_template_content_format = args .chat_template_content_format ,
@@ -1006,6 +1014,7 @@ async def init_app_state(
10061014 engine_client ,
10071015 model_config ,
10081016 state .openai_serving_models ,
1017+ lora_cache_dir = args .lora_cache_dir ,
10091018 request_logger = request_logger ,
10101019 ) if model_config .runner_type == "transcription" else None
10111020 state .task = model_config .task
@@ -1067,7 +1076,12 @@ def signal_handler(*_) -> None:
10671076 app = build_app (args )
10681077
10691078 model_config = await engine_client .get_model_config ()
1070- await init_app_state (engine_client , model_config , app .state , args )
1079+ await init_app_state (
1080+ engine_client ,
1081+ model_config ,
1082+ app .state ,
1083+ args ,
1084+ )
10711085
10721086 def _listen_addr (a : str ) -> str :
10731087 if is_valid_ipv6_address (a ):
@@ -1113,5 +1127,6 @@ def _listen_addr(a: str) -> str:
11131127 parser = make_arg_parser (parser )
11141128 args = parser .parse_args ()
11151129 validate_parsed_serve_args (args )
1130+ validate_lora_cache_args (args )
11161131
11171132 uvloop .run (run_server (args ))
0 commit comments