@@ -57,8 +57,8 @@ func TestPrefixPlugin(t *testing.T) {
57
57
assert .NoError (t , err )
58
58
t .Logf ("Hashes %+v, cached servers: %+v" , state .PrefixHashes , state .PrefixCacheServers )
59
59
// Input size is 6, hash block size is 4, the last 2 characters are ignored.
60
- // Total hashes = 2 (the first one is for the model)
61
- assert .Equal (t , 2 , len (state .PrefixHashes ), "number of hashes is incorrect" )
60
+ // Total hashes = 1 (the first one is for the prefix with model)
61
+ assert .Equal (t , 1 , len (state .PrefixHashes ), "number of hashes is incorrect" )
62
62
assert .Equal (t , 0 , len (state .PrefixCacheServers ), "there shouldn't be any cached servers" )
63
63
assert .Equal (t , float64 (0 ), scores [pod1 ], "score for pod1" )
64
64
assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
@@ -84,8 +84,8 @@ func TestPrefixPlugin(t *testing.T) {
84
84
assert .NoError (t , err )
85
85
t .Logf ("Hashes %+v, cached servers: %+v" , state .PrefixHashes , state .PrefixCacheServers )
86
86
// Input size is 6, hash block size is 4, the last 2 characters are ignored.
87
- // Total hashes = 2 (the first one is for the model)
88
- assert .Equal (t , 2 , len (state .PrefixHashes ), "number of hashes is incorrect" )
87
+ // Total hashes = 1 (the first one is for the prefix with model)
88
+ assert .Equal (t , 1 , len (state .PrefixHashes ), "number of hashes is incorrect" )
89
89
assert .Equal (t , 0 , len (state .PrefixCacheServers ), "there shouldn't be any cached servers" )
90
90
assert .Equal (t , float64 (0 ), scores [pod1 ], "score for pod1" )
91
91
assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
@@ -110,10 +110,10 @@ func TestPrefixPlugin(t *testing.T) {
110
110
assert .NoError (t , err )
111
111
t .Logf ("Hashes %+v, cached servers: %+v" , state .PrefixHashes , state .PrefixCacheServers )
112
112
// Input size is 8, hash block size is 4, so 2 hashes will be calculated.
113
- // Total hashes = 3 (the first one is for the model)
114
- assert .Equal (t , 3 , len (state .PrefixHashes ), "number of hashes is incorrect" )
113
+ // Total hashes = 2 (the first one is for the prefix with model)
114
+ assert .Equal (t , 2 , len (state .PrefixHashes ), "number of hashes is incorrect" )
115
115
assert .Equal (t , 1 , len (state .PrefixCacheServers ), "pod1 should have cached the aaaa prefix" )
116
- assert .Equal (t , float64 ( 2 ) / float64 ( 3 ) , scores [pod1 ], "score should be 2/3 - the model and the first prefix block match" )
116
+ assert .Equal (t , 0.5 , scores [pod1 ], "score should be 0.5 - the model and the first prefix block match" )
117
117
assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
118
118
119
119
schedulingResult = & types.SchedulingResult {
@@ -135,8 +135,8 @@ func TestPrefixPlugin(t *testing.T) {
135
135
assert .NoError (t , err )
136
136
t .Logf ("Hashes %+v, cached servers: %+v" , state .PrefixHashes , state .PrefixCacheServers )
137
137
// Input size is 8, hash block size is 4, so 2 hashes will be calculated.
138
- // Total hashes = 3 (the first one is for the model)
139
- assert .Equal (t , 3 , len (state .PrefixHashes ), "number of hashes is incorrect" )
138
+ // Total hashes = 2 (the first one is for the prefix with model)
139
+ assert .Equal (t , 2 , len (state .PrefixHashes ), "number of hashes is incorrect" )
140
140
assert .Equal (t , 0 , len (state .PrefixCacheServers ), "pod1 should have cached the aaaa prefix" )
141
141
assert .Equal (t , float64 (0 ), scores [pod1 ], "score for pod1" )
142
142
assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
@@ -160,10 +160,10 @@ func TestPrefixPlugin(t *testing.T) {
160
160
assert .NoError (t , err )
161
161
t .Logf ("Hashes %+v, cached servers: %+v" , state .PrefixHashes , state .PrefixCacheServers )
162
162
// Input size is 12, hash block size is 4, so 3 hashes will be calculated.
163
- // Total hashes = 4 (the first one is for the model)
164
- assert .Equal (t , 4 , len (state .PrefixHashes ), "number of hashes is incorrect" )
163
+ // Total hashes = 3 (the first one is for the prefix with model)
164
+ assert .Equal (t , 3 , len (state .PrefixHashes ), "number of hashes is incorrect" )
165
165
assert .Equal (t , 1 , len (state .PrefixCacheServers ), "pod1 should have cached the aaaa prefix" )
166
- assert .Equal (t , 0.75 , scores [pod1 ], "score should be 0.75 - the model and the first 2 prefix blocks match" )
166
+ assert .Equal (t , 2. / 3 , scores [pod1 ], "score should be 2./3 - the model and the first 2 prefix blocks match" )
167
167
assert .Equal (t , float64 (0 ), scores [pod2 ], "score for pod2" )
168
168
169
169
schedulingResult = & types.SchedulingResult {
@@ -224,7 +224,7 @@ func BenchmarkPrefixPluginStress(b *testing.B) {
224
224
// Second cycle: validate internal state
225
225
state , err := plugins .ReadPluginStateKey [* SchedulingContextState ](plugin .pluginState , req .RequestId , PrefixCachePluginType )
226
226
assert .NoError (b , err )
227
- expectedHashes := int (math .Min (float64 (maxPrefixBlocks + 1 ), float64 (len (req .Prompt )/ blockSize + 1 ))) // the extra one is for the model.
227
+ expectedHashes := int (math .Min (float64 (maxPrefixBlocks ), float64 (len (req .Prompt )/ blockSize )))
228
228
assert .Equal (b , expectedHashes , len (state .PrefixHashes ), "number of hashes is incorrect" )
229
229
}
230
230
}
0 commit comments