@@ -487,108 +487,7 @@ func (c *MilvusCache) addEntry(id string, requestID string, model string, query
487487
488488// FindSimilar searches for semantically similar cached requests
489489func (c * MilvusCache ) FindSimilar (model string , query string ) ([]byte , bool , error ) {
490- start := time .Now ()
491-
492- if ! c .enabled {
493- observability .Debugf ("MilvusCache.FindSimilar: cache disabled" )
494- return nil , false , nil
495- }
496- queryPreview := query
497- if len (query ) > 50 {
498- queryPreview = query [:50 ] + "..."
499- }
500- observability .Debugf ("MilvusCache.FindSimilar: searching for model='%s', query='%s' (len=%d chars)" ,
501- model , queryPreview , len (query ))
502-
503- // Generate semantic embedding for similarity comparison
504- queryEmbedding , err := candle_binding .GetEmbedding (query , 0 ) // Auto-detect dimension
505- if err != nil {
506- metrics .RecordCacheOperation ("milvus" , "find_similar" , "error" , time .Since (start ).Seconds ())
507- return nil , false , fmt .Errorf ("failed to generate embedding: %w" , err )
508- }
509-
510- ctx := context .Background ()
511-
512- // Define search parameters
513- searchParam , err := entity .NewIndexHNSWSearchParam (c .config .Search .Params .Ef )
514- if err != nil {
515- return nil , false , fmt .Errorf ("failed to create search parameters: %w" , err )
516- }
517-
518- // Use Milvus Search for efficient similarity search
519- searchResult , err := c .client .Search (
520- ctx ,
521- c .collectionName ,
522- []string {},
523- fmt .Sprintf ("model == \" %s\" && response_body != \" \" " , model ),
524- []string {"response_body" },
525- []entity.Vector {entity .FloatVector (queryEmbedding )},
526- c .config .Collection .VectorField .Name ,
527- entity .MetricType (c .config .Collection .VectorField .MetricType ),
528- c .config .Search .TopK ,
529- searchParam ,
530- )
531- if err != nil {
532- observability .Debugf ("MilvusCache.FindSimilar: search failed: %v" , err )
533- atomic .AddInt64 (& c .missCount , 1 )
534- metrics .RecordCacheOperation ("milvus" , "find_similar" , "error" , time .Since (start ).Seconds ())
535- metrics .RecordCacheMiss ()
536- return nil , false , nil
537- }
538-
539- if len (searchResult ) == 0 || searchResult [0 ].ResultCount == 0 {
540- atomic .AddInt64 (& c .missCount , 1 )
541- observability .Debugf ("MilvusCache.FindSimilar: no entries found" )
542- metrics .RecordCacheOperation ("milvus" , "find_similar" , "miss" , time .Since (start ).Seconds ())
543- metrics .RecordCacheMiss ()
544- return nil , false , nil
545- }
546-
547- bestScore := searchResult [0 ].Scores [0 ]
548- if bestScore < c .similarityThreshold {
549- atomic .AddInt64 (& c .missCount , 1 )
550- observability .Debugf ("MilvusCache.FindSimilar: CACHE MISS - best_similarity=%.4f < threshold=%.4f" ,
551- bestScore , c .similarityThreshold )
552- observability .LogEvent ("cache_miss" , map [string ]interface {}{
553- "backend" : "milvus" ,
554- "best_similarity" : bestScore ,
555- "threshold" : c .similarityThreshold ,
556- "model" : model ,
557- "collection" : c .collectionName ,
558- })
559- metrics .RecordCacheOperation ("milvus" , "find_similar" , "miss" , time .Since (start ).Seconds ())
560- metrics .RecordCacheMiss ()
561- return nil , false , nil
562- }
563-
564- // Cache Hit
565- var responseBody []byte
566- responseBodyColumn , ok := searchResult [0 ].Fields [0 ].(* entity.ColumnVarChar )
567- if ok && responseBodyColumn .Len () > 0 {
568- responseBody = []byte (responseBodyColumn .Data ()[0 ])
569- }
570-
571- if responseBody == nil {
572- observability .Debugf ("MilvusCache.FindSimilar: cache hit but response_body is missing or not a string" )
573- atomic .AddInt64 (& c .missCount , 1 )
574- metrics .RecordCacheOperation ("milvus" , "find_similar" , "error" , time .Since (start ).Seconds ())
575- metrics .RecordCacheMiss ()
576- return nil , false , nil
577- }
578-
579- atomic .AddInt64 (& c .hitCount , 1 )
580- observability .Debugf ("MilvusCache.FindSimilar: CACHE HIT - similarity=%.4f >= threshold=%.4f, response_size=%d bytes" ,
581- bestScore , c .similarityThreshold , len (responseBody ))
582- observability .LogEvent ("cache_hit" , map [string ]interface {}{
583- "backend" : "milvus" ,
584- "similarity" : bestScore ,
585- "threshold" : c .similarityThreshold ,
586- "model" : model ,
587- "collection" : c .collectionName ,
588- })
589- metrics .RecordCacheOperation ("milvus" , "find_similar" , "hit" , time .Since (start ).Seconds ())
590- metrics .RecordCacheHit ()
591- return responseBody , true , nil
490+ return c .FindSimilarWithThreshold (model , query , c .similarityThreshold )
592491}
593492
594493// FindSimilarWithThreshold searches for semantically similar cached requests using a specific threshold
0 commit comments