Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/dev.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const(
DEFAULT=2
VERBOSE=3
DEBUG=4
TRACE=5
)
```

Expand Down Expand Up @@ -54,12 +55,12 @@ The guidelines are written in the context of a k8s controller. Our [ext-proc](..
* `V(DEBUG)` and above: Use your best judgement.

4. Metric scraping loops. These loops run at a very high frequency, and logs can be very spammy if not handled properly.
* `klog.V(DEBUG).InfoS`
* `klog.V(TRACE).InfoS`
* Transient errors/warnings, such as failure to get response from a pod.
* Important state changes, such as updating a metric.

5. Misc
1. Periodic (every 5s) debug loop which prints the current pods and metrics.
* `klog.WarningS` If the metrics are not fresh enough, which indicates an error occurred during the metric scraping loop.
* `klog.V(VERBOSE).InfoS`
* `klog.V(DEBUG).InfoS`
* This is very important to debug the request scheduling algorithm, and yet not spammy compared to the metric scraping loop logs.
8 changes: 4 additions & 4 deletions pkg/ext-proc/backend/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (p *Provider) Init(refreshPodsInterval, refreshMetricsInterval time.Duratio
for {
time.Sleep(refreshMetricsInterval)
if err := p.refreshMetricsOnce(); err != nil {
klog.V(logutil.DEBUG).Infof("Failed to refresh metrics: %v", err)
klog.V(logutil.TRACE).Infof("Failed to refresh metrics: %v", err)
}
}
}()
Expand Down Expand Up @@ -135,12 +135,12 @@ func (p *Provider) refreshMetricsOnce() error {
defer func() {
d := time.Since(start)
// TODO: add a metric instead of logging
klog.V(logutil.DEBUG).Infof("Refreshed metrics in %v", d)
klog.V(logutil.TRACE).Infof("Refreshed metrics in %v", d)
}()
var wg sync.WaitGroup
errCh := make(chan error)
processOnePod := func(key, value any) bool {
klog.V(logutil.DEBUG).Infof("Processing pod %v and metric %v", key, value)
klog.V(logutil.TRACE).Infof("Processing pod %v and metric %v", key, value)
pod := key.(Pod)
existing := value.(*PodMetrics)
wg.Add(1)
Expand All @@ -152,7 +152,7 @@ func (p *Provider) refreshMetricsOnce() error {
return
}
p.UpdatePodMetrics(pod, updated)
klog.V(logutil.DEBUG).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics)
klog.V(logutil.TRACE).Infof("Updated metrics for pod %s: %v", pod, updated.Metrics)
}()
return true
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/ext-proc/backend/vllm/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,6 @@ func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName str
latest = m
}
}
klog.V(logutil.DEBUG).Infof("Got metric value %+v for metric %v", latest, metricName)
klog.V(logutil.TRACE).Infof("Got metric value %+v for metric %v", latest, metricName)
return latest, nil
}
1 change: 1 addition & 0 deletions pkg/ext-proc/util/logging/logging_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ const (
DEFAULT = 2
VERBOSE = 3
DEBUG = 4
TRACE = 5
)