@@ -18,6 +18,7 @@ package metrics
1818
1919import (
2020 "context"
21+ "runtime/debug"
2122 "sync"
2223 "time"
2324
@@ -219,6 +220,40 @@ var (
219220 },
220221 []string {"commit" },
221222 )
223+
224+ // Prefix indexer Metrics
225+ PrefixCacheSize = compbasemetrics .NewGaugeVec (
226+ & compbasemetrics.GaugeOpts {
227+ Subsystem : InferenceExtension ,
228+ Name : "prefix_indexer_size" ,
229+ Help : "Size of the prefix indexer." ,
230+ StabilityLevel : compbasemetrics .ALPHA ,
231+ },
232+ []string {},
233+ )
234+
235+ PrefixCacheHitRatio = compbasemetrics .NewHistogramVec (
236+ & compbasemetrics.HistogramOpts {
237+ Subsystem : InferenceExtension ,
238+ Name : "prefix_indexer_hit_ratio" ,
239+ Help : "Ratio of prefix length matched to total prefix length in the cache lookup." ,
240+ // Buckets from 0.0 to 1.0 in increments
241+ Buckets : []float64 {0.0 , 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 },
242+ StabilityLevel : compbasemetrics .ALPHA ,
243+ },
244+ []string {},
245+ )
246+
247+ PrefixCacheHitLength = compbasemetrics .NewHistogramVec (
248+ & compbasemetrics.HistogramOpts {
249+ Subsystem : InferenceExtension ,
250+ Name : "prefix_indexer_hit_bytes" ,
251+ Help : "Length of the prefix match in number of bytes in the cache lookup." ,
252+ Buckets : []float64 {0 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 , 32768 , 65536 },
253+ StabilityLevel : compbasemetrics .ALPHA ,
254+ },
255+ []string {},
256+ )
222257)
223258
224259var registerMetrics sync.Once
@@ -244,6 +279,10 @@ func Register() {
244279 legacyregistry .MustRegister (SchedulerE2ELatency )
245280
246281 legacyregistry .MustRegister (InferenceExtensionInfo )
282+
283+ legacyregistry .MustRegister (PrefixCacheSize )
284+ legacyregistry .MustRegister (PrefixCacheHitRatio )
285+ legacyregistry .MustRegister (PrefixCacheHitLength )
247286 })
248287}
249288
@@ -352,8 +391,44 @@ func RecordSchedulerE2ELatency(duration time.Duration) {
352391 SchedulerE2ELatency .WithLabelValues ().Observe (duration .Seconds ())
353392}
354393
394+ // RecordPrefixCacheSize records the size of the prefix indexer in megabytes.
395+ func RecordPrefixCacheSize (size int64 ) {
396+ PrefixCacheSize .WithLabelValues ().Set (float64 (size ))
397+ }
398+
399+ // RecordPrefixCacheMatch records both the hit ratio and hit length for a prefix indexer match.
400+ // matchedLength is the number of characters that matched, and totalLength is the total prefix length.
401+ func RecordPrefixCacheMatch (matchedLength , totalLength int ) {
402+ // Record the hit length metric
403+ PrefixCacheHitLength .WithLabelValues ().Observe (float64 (matchedLength ))
404+
405+ // Record the hit ratio metric if totalLength is positive
406+ if totalLength > 0 {
407+ ratio := float64 (matchedLength ) / float64 (totalLength )
408+ PrefixCacheHitRatio .WithLabelValues ().Observe (ratio )
409+ }
410+ }
411+
355412func RecordInferenceExtensionInfo () {
356413 if CommitSHA != "" {
357414 InferenceExtensionInfo .WithLabelValues (CommitSHA ).Set (1 )
358415 }
359416}
417+
418+ func init () {
419+ info , ok := debug .ReadBuildInfo ()
420+ if ! ok {
421+ return
422+ }
423+
424+ var Commit = func (i * debug.BuildInfo ) string {
425+ for _ , setting := range i .Settings {
426+ if setting .Key == "vcs.revision" {
427+ return setting .Value
428+ }
429+ }
430+ return ""
431+ }(info )
432+
433+ CommitSHA = Commit
434+ }
0 commit comments