@@ -67,10 +67,12 @@ var (
6767 "The gRPC port used for communicating with Envoy proxy" )
6868 grpcHealthPort = flag .Int (
6969 "grpcHealthPort" ,
70- 9003 ,
70+ runserver . DefaultGrpcHealthPort ,
7171 "The port used for gRPC liveness and readiness probes" )
7272 metricsPort = flag .Int (
73- "metricsPort" , 9090 , "The metrics port" )
73+ "metricsPort" ,
74+ runserver .DefaultMetricsPort ,
75+ "The metrics port" )
7476 destinationEndpointHintKey = flag .String (
7577 "destinationEndpointHintKey" ,
7678 runserver .DefaultDestinationEndpointHintKey ,
@@ -96,31 +98,50 @@ var (
9698 "refreshPrometheusMetricsInterval" ,
9799 runserver .DefaultRefreshPrometheusMetricsInterval ,
98100 "interval to flush prometheus metrics" )
99- logVerbosity = flag .Int ("v" , logging .DEFAULT , "number for the log level verbosity" )
101+ logVerbosity = flag .Int (
102+ "v" ,
103+ logging .DEFAULT ,
104+ "number for the log level verbosity" )
100105 secureServing = flag .Bool (
101- "secureServing" , runserver .DefaultSecureServing , "Enables secure serving. Defaults to true." )
102- healthChecking = flag .Bool ("healthChecking" , runserver .DefaultHealthChecking , "Enables health checking" )
103- certPath = flag .String (
104- "certPath" , "" , "The path to the certificate for secure serving. The certificate and private key files " +
106+ "secureServing" ,
107+ runserver .DefaultSecureServing ,
108+ "Enables secure serving. Defaults to true." )
109+ healthChecking = flag .Bool (
110+ "healthChecking" ,
111+ runserver .DefaultHealthChecking ,
112+ "Enables health checking" )
113+ certPath = flag .String (
114+ "certPath" ,
115+ runserver .DefaultCertPath ,
116+ "The path to the certificate for secure serving. The certificate and private key files " +
105117 "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, " +
106118 "then a self-signed certificate is used." )
107119 // metric flags
108- totalQueuedRequestsMetric = flag .String ("totalQueuedRequestsMetric" ,
109- "vllm:num_requests_waiting" ,
120+ totalQueuedRequestsMetric = flag .String (
121+ "totalQueuedRequestsMetric" ,
122+ runserver .DefaultTotalQueuedRequestsMetric ,
110123 "Prometheus metric for the number of queued requests." )
111124 totalRunningRequestsMetric = flag .String ("totalRunningRequestsMetric" ,
112125 "vllm:num_requests_running" ,
113126 "Prometheus metric for the number of running requests. This is the same as totalQueuedRequestsMetric, but for running requests." )
114- kvCacheUsagePercentageMetric = flag .String ("kvCacheUsagePercentageMetric" ,
115- "vllm:gpu_cache_usage_perc" ,
127+ kvCacheUsagePercentageMetric = flag .String (
128+ "kvCacheUsagePercentageMetric" ,
129+ runserver .DefaultKvCacheUsagePercentageMetric ,
116130 "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1)." )
117131 // LoRA metrics
118- loraInfoMetric = flag .String ("loraInfoMetric" ,
119- "vllm:lora_requests_info" ,
132+ loraInfoMetric = flag .String (
133+ "loraInfoMetric" ,
134+ runserver .DefaultLoraInfoMetric ,
120135 "Prometheus metric for the LoRA info metrics (must be in vLLM label format)." )
121136 // configuration flags
122- configFile = flag .String ("configFile" , "" , "The path to the configuration file" )
123- configText = flag .String ("configText" , "" , "The configuration specified as text, in lieu of a file" )
137+ configFile = flag .String (
138+ "configFile" ,
139+ runserver .DefaultConfigFile ,
140+ "The path to the configuration file" )
141+ configText = flag .String (
142+ "configText" ,
143+ runserver .DefaultConfigText ,
144+ "The configuration specified as text, in lieu of a file" )
124145
125146 // Latency Predictor Flag
126147 enableLatencyPredictor = flag .Bool ("enable-latency-predictor" , false , "Enable the regression-based latency predictor and scheduler scorer." )
@@ -434,7 +455,7 @@ func validateFlags() error {
434455 return fmt .Errorf ("required %q flag not set" , "poolName" )
435456 }
436457 if * configText != "" && * configFile != "" {
437- return fmt .Errorf ("both the %s and %s flags can not be set at the same time" , "configText" , "configFile" )
458+ return fmt .Errorf ("both the %q and %q flags can not be set at the same time" , "configText" , "configFile" )
438459 }
439460
440461 return nil
0 commit comments