Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ var (
certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")
enableCertReload = flag.Bool("enable-cert-reload", runserver.DefaultCertReload, "Enables certificate reloading of the certificates specified in --cert-path")
// metric flags
totalQueuedRequestsMetric = flag.String("total-queued-requests-metric", runserver.DefaultTotalQueuedRequestsMetric, "Prometheus metric for the number of queued requests.")
kvCacheUsagePercentageMetric = flag.String("kv-cache-usage-percentage-metric", runserver.DefaultKvCacheUsagePercentageMetric, "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).")
Expand Down Expand Up @@ -345,6 +346,7 @@ func (r *Runner) Run(ctx context.Context) error {
SecureServing: *secureServing,
HealthChecking: *healthChecking,
CertPath: *certPath,
EnableCertReload: *enableCertReload,
RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval,
MetricsStalenessThreshold: *metricsStalenessThreshold,
Director: director,
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/cespare/xxhash/v2 v2.3.0
github.com/elastic/crd-ref-docs v0.2.0
github.com/envoyproxy/go-control-plane/envoy v1.35.0
github.com/fsnotify/fsnotify v1.9.0
github.com/go-logr/logr v1.4.3
github.com/google/go-cmp v0.7.0
github.com/google/uuid v1.6.0
Expand Down Expand Up @@ -58,7 +59,6 @@ require (
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.9.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
Expand Down
103 changes: 103 additions & 0 deletions pkg/common/certs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package common

import (
"context"
"crypto/tls"
"fmt"
"sync/atomic"
"time"

"github.com/fsnotify/fsnotify"
"sigs.k8s.io/controller-runtime/pkg/log"

logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

// debounceDelay wait for events to settle before reloading
const debounceDelay = 250 * time.Millisecond

type CertReloader struct {
cert *atomic.Pointer[tls.Certificate]
}

func NewCertReloader(ctx context.Context, path string, init *tls.Certificate) (*CertReloader, error) {
certPtr := &atomic.Pointer[tls.Certificate]{}
certPtr.Store(init)

w, err := fsnotify.NewWatcher()
if err != nil {
return nil, fmt.Errorf("failed to create cert watcher: %w", err)
}

logger := log.FromContext(ctx).
WithName("cert-reloader").
WithValues("path", path)
traceLogger := logger.V(logutil.TRACE)

if err := w.Add(path); err != nil {
_ = w.Close() // Clean up watcher before returning
return nil, fmt.Errorf("failed to watch %q: %w", path, err)
}

go func() {
defer w.Close()

var debounceTimer *time.Timer

for {
select {
case ev := <-w.Events:
traceLogger.Info("Cert changed", "event", ev)

if ev.Op&(fsnotify.Write|fsnotify.Create) == 0 {
continue
}

// Debounce: reset the timer if we get another event
if debounceTimer != nil {
debounceTimer.Stop()
}

debounceTimer = time.AfterFunc(debounceDelay, func() {
// This runs after the delay with no new events
cert, err := tls.LoadX509KeyPair(path+"/tls.crt", path+"/tls.key")
if err != nil {
logger.Error(err, "Failed to reload TLS certificate")
return
}
certPtr.Store(&cert)
traceLogger.Info("Reloaded TLS certificate")
})

case err := <-w.Errors:
if err != nil {
logger.Error(err, "cert watcher failed")
}
case <-ctx.Done():
return
}
}
}()

return &CertReloader{cert: certPtr}, nil
}

func (r *CertReloader) Get() *tls.Certificate {
return r.cert.Load()
}
Loading