Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,12 @@ var (
"The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int(
"grpcHealthPort",
9003,
runserver.DefaultGrpcHealthPort,
"The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int(
"metricsPort", 9090, "The metrics port")
"metricsPort",
runserver.DefaultMetricsPort,
"The metrics port")
destinationEndpointHintKey = flag.String(
"destinationEndpointHintKey",
runserver.DefaultDestinationEndpointHintKey,
Expand All @@ -93,28 +95,47 @@ var (
"refreshPrometheusMetricsInterval",
runserver.DefaultRefreshPrometheusMetricsInterval,
"interval to flush prometheus metrics")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
logVerbosity = flag.Int(
"v",
logging.DEFAULT,
"number for the log level verbosity")
secureServing = flag.Bool(
"secureServing", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
healthChecking = flag.Bool("healthChecking", runserver.DefaultHealthChecking, "Enables health checking")
certPath = flag.String(
"certPath", "", "The path to the certificate for secure serving. The certificate and private key files "+
"secureServing",
runserver.DefaultSecureServing,
"Enables secure serving. Defaults to true.")
healthChecking = flag.Bool(
"healthChecking",
runserver.DefaultHealthChecking,
"Enables health checking")
certPath = flag.String(
"certPath",
runserver.DefaultCertPath,
"The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")
// metric flags
totalQueuedRequestsMetric = flag.String("totalQueuedRequestsMetric",
"vllm:num_requests_waiting",
totalQueuedRequestsMetric = flag.String(
"totalQueuedRequestsMetric",
runserver.DefaultTotalQueuedRequestsMetric,
"Prometheus metric for the number of queued requests.")
kvCacheUsagePercentageMetric = flag.String("kvCacheUsagePercentageMetric",
"vllm:gpu_cache_usage_perc",
kvCacheUsagePercentageMetric = flag.String(
"kvCacheUsagePercentageMetric",
runserver.DefaultKvCacheUsagePercentageMetric,
"Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
// LoRA metrics
loraInfoMetric = flag.String("loraInfoMetric",
"vllm:lora_requests_info",
loraInfoMetric = flag.String(
"loraInfoMetric",
runserver.DefaultLoraInfoMetric,
"Prometheus metric for the LoRA info metrics (must be in vLLM label format).")
// configuration flags
configFile = flag.String("configFile", "", "The path to the configuration file")
configText = flag.String("configText", "", "The configuration specified as text, in lieu of a file")
configFile = flag.String(
"configFile",
runserver.DefaultConfigFile,
"The path to the configuration file")
configText = flag.String(
"configText",
runserver.DefaultConfigText,
"The configuration specified as text, in lieu of a file")

setupLog = ctrl.Log.WithName("setup")

Expand Down Expand Up @@ -405,7 +426,7 @@ func validateFlags() error {
return fmt.Errorf("required %q flag not set", "poolName")
}
if *configText != "" && *configFile != "" {
return fmt.Errorf("both the %s and %s flags can not be set at the same time", "configText", "configFile")
return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile")
}

return nil
Expand Down
9 changes: 8 additions & 1 deletion pkg/epp/server/runserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"context"
"crypto/tls"
"fmt"

"time"

extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
Expand Down Expand Up @@ -63,6 +62,8 @@ type ExtProcServerRunner struct {
// Default values for CLI flags in main
const (
DefaultGrpcPort = 9002 // default for --grpcPort
DefaultGrpcHealthPort = 9003 // default for --grpcHealthPort
DefaultMetricsPort = 9090 // default for --metricsPort
DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace
DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destinationEndpointHintKey
DefaultPoolName = "" // required but no default
Expand All @@ -71,6 +72,12 @@ const (
DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval
DefaultSecureServing = true // default for --secureServing
DefaultHealthChecking = false // default for --healthChecking
DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --totalQueuedRequestsMetric
DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kvCacheUsagePercentageMetric
DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --loraInfoMetric
DefaultCertPath = "" // default for --certPath
DefaultConfigFile = "" // default for --configFile
DefaultConfigText = "" // default for --configText
)

// NewDefaultExtProcServerRunner creates a runner with default values.
Expand Down