@@ -130,6 +130,40 @@ void lj_str_resize(lua_State *L, MSize newmask)
130130 g -> strhash = newhash ;
131131}
132132
133+ #if LUAJIT_SMART_STRINGS
134+ static LJ_AINLINE uint32_t
135+ lj_fullhash (const uint8_t * v , MSize len )
136+ {
137+ MSize a = 0 , b = 0 ;
138+ MSize c = 0xcafedead ;
139+ MSize d = 0xdeadbeef ;
140+ MSize h = len ;
141+ lua_assert (len >= 12 );
142+ for (; len > 8 ; len -= 8 , v += 8 ) {
143+ a ^= lj_getu32 (v );
144+ b ^= lj_getu32 (v + 4 );
145+ c += a ;
146+ d += b ;
147+ a = lj_rol (a , 5 ) - d ;
148+ b = lj_rol (b , 7 ) - c ;
149+ c = lj_rol (c , 24 ) ^ a ;
150+ d = lj_rol (d , 1 ) ^ b ;
151+ }
152+ a ^= lj_getu32 (v + len - 8 );
153+ b ^= lj_getu32 (v + len - 4 );
154+ c += b ; c -= lj_rol (a , 9 );
155+ d += a ; d -= lj_rol (b , 18 );
156+ h -= lj_rol (a ^b ,7 );
157+ h += c ; h += lj_rol (d ,13 );
158+ d ^= c ; d -= lj_rol (c ,25 );
159+ h ^= d ; h -= lj_rol (d ,16 );
160+ c ^= h ; c -= lj_rol (h ,4 );
161+ d ^= c ; d -= lj_rol (c ,14 );
162+ h ^= d ; h -= lj_rol (d ,24 );
163+ return h ;
164+ }
165+ #endif
166+
133167/* Intern a string and return string object. */
134168GCstr * lj_str_new (lua_State * L , const char * str , size_t lenx )
135169{
@@ -138,6 +172,10 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
138172 GCobj * o ;
139173 MSize len = (MSize )lenx ;
140174 MSize a , b , h = len ;
175+ uint8_t strflags = 0 ;
176+ #if LUAJIT_SMART_STRINGS
177+ unsigned collisions = 0 ;
178+ #endif
141179 if (lenx >= LJ_MAX_STR )
142180 lj_err_msg (L , LJ_ERR_STROV );
143181 g = G (L );
@@ -161,34 +199,123 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
161199 h ^= b ; h -= lj_rol (b , 16 );
162200 /* Check if the string has already been interned. */
163201 o = gcref (g -> strhash [h & g -> strmask ]);
202+ #if LUAJIT_SMART_STRINGS
203+ /*
204+ ** The default "fast" string hash function samples only a few positions
205+ ** in a string, the remaining bytes don't affect the function's result.
206+ ** The function performs well for short strings; however long strings
207+ ** can yield extremely high collision rates.
208+ **
209+ ** An adaptive schema was implemented. Two hash functions are used
210+ ** simultaneously. A bucket is picked based on the output of the fast
211+ ** hash function. If an item is to be inserted in a collision chain
212+ ** longer than a certain threshold, another bucket is picked based on
213+ ** the stronger hash function. Since two hash functions are used
214+ ** simultaneously, insert should consider two buckets. The second bucket
215+ ** is often NOT considered thanks to the bloom filter. The filter is
216+ ** rebuilt during GC cycle.
217+ **
218+ ** Parameters below were tuned on a set of benchmarks. Max_collisions is
219+ ** also backed by theory: the expected maximum length of a collision
220+ ** chain in a hash table with the fill factor of 1.0 is
221+ ** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
222+ ** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
223+ ** an anomaly.
224+ **/
225+ #define max_collisions 40
226+ #define inc_collision_soft () (collisions++)
227+ /* If different strings yield the same hash sum, grow counter faster. */
228+ #define inc_collision_hard () (collisions+=1+(len>>4), 1)
229+ #else
230+ #define inc_collision_hard () (1)
231+ #define inc_collision_soft ()
232+ #endif
164233 if (LJ_LIKELY ((((uintptr_t )str + len - 1 ) & (LJ_PAGESIZE - 1 )) <= LJ_PAGESIZE - 4 )) {
165234 while (o != NULL ) {
166235 GCstr * sx = gco2str (o );
167- if (sx -> len == len && str_fastcmp (str , strdata (sx ), len ) == 0 ) {
236+ if (sx -> hash == h && sx -> len == len && inc_collision_hard () &&
237+ str_fastcmp (str , strdata (sx ), len ) == 0 ) {
168238 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
169239 if (isdead (g , o )) flipwhite (o );
170240 return sx ; /* Return existing string. */
171241 }
172242 o = gcnext (o );
243+ inc_collision_soft ();
173244 }
174245 } else { /* Slow path: end of string is too close to a page boundary. */
175246 while (o != NULL ) {
176247 GCstr * sx = gco2str (o );
177- if (sx -> len == len && memcmp (str , strdata (sx ), len ) == 0 ) {
248+ if (sx -> hash == h && sx -> len == len && inc_collision_hard () &&
249+ memcmp (str , strdata (sx ), len ) == 0 ) {
178250 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
179251 if (isdead (g , o )) flipwhite (o );
180252 return sx ; /* Return existing string. */
181253 }
182254 o = gcnext (o );
255+ inc_collision_soft ();
183256 }
184257 }
258+ #if LUAJIT_SMART_STRINGS
259+ /* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
260+ if (len > 12 ) {
261+ /*
262+ ** The bloom filter is keyed with the high 12 bits of the fast
263+ ** hash sum. The filter is rebuilt during GC cycle. It's beneficial
264+ ** to have these bits readily available and avoid hash sum
265+ ** recalculation during GC. High 6 bits are included in the "full"
266+ ** hash sum, and bits 19-25 are stored in s->strflags.
267+ **/
268+ int search_fullh =
269+ bloomtest (g -> strbloom .cur [0 ], h >>(sizeof (h )* 8 - 6 )) != 0 &&
270+ bloomtest (g -> strbloom .cur [1 ], h >>(sizeof (h )* 8 - 12 )) != 0 ;
271+ if (LJ_UNLIKELY (search_fullh || collisions > max_collisions )) {
272+ MSize fh = lj_fullhash ((const uint8_t * )str , len );
273+ #define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
274+ fh = (fh >> 6 ) | (h & high6mask );
275+ if (search_fullh ) {
276+ /* Recheck if the string has already been interned with "harder" hash. */
277+ o = gcref (g -> strhash [fh & g -> strmask ]);
278+ if (LJ_LIKELY ((((uintptr_t )str + len - 1 ) & (LJ_PAGESIZE - 1 )) <= LJ_PAGESIZE - 4 )) {
279+ while (o != NULL ) {
280+ GCstr * sx = gco2str (o );
281+ if (sx -> hash == fh && sx -> len == len && str_fastcmp (str , strdata (sx ), len ) == 0 ) {
282+ /* Resurrect if dead. Can only happen with fixstring() (keywords). */
283+ if (isdead (g , o )) flipwhite (o );
284+ return sx ; /* Return existing string. */
285+ }
286+ o = gcnext (o );
287+ }
288+ } else { /* Slow path: end of string is too close to a page boundary. */
289+ while (o != NULL ) {
290+ GCstr * sx = gco2str (o );
291+ if (sx -> hash == fh && sx -> len == len && memcmp (str , strdata (sx ), len ) == 0 ) {
292+ /* Resurrect if dead. Can only happen with fixstring() (keywords). */
293+ if (isdead (g , o )) flipwhite (o );
294+ return sx ; /* Return existing string. */
295+ }
296+ o = gcnext (o );
297+ }
298+ }
299+ }
300+ if (collisions > max_collisions ) {
301+ strflags = 0xc0 | ((h >>(sizeof (h )* 8 - 12 ))& 0x3f );
302+ bloomset (g -> strbloom .cur [0 ], h >>(sizeof (h )* 8 - 6 ));
303+ bloomset (g -> strbloom .cur [1 ], h >>(sizeof (h )* 8 - 12 ));
304+ bloomset (g -> strbloom .new [0 ], h >>(sizeof (h )* 8 - 6 ));
305+ bloomset (g -> strbloom .new [1 ], h >>(sizeof (h )* 8 - 12 ));
306+ h = fh ;
307+ }
308+ }
309+ }
310+ #endif
185311 /* Nope, create a new string. */
186312 s = lj_mem_newt (L , sizeof (GCstr )+ len + 1 , GCstr );
187313 newwhite (g , s );
188314 s -> gct = ~LJ_TSTR ;
189315 s -> len = len ;
190316 s -> hash = h ;
191317 s -> reserved = 0 ;
318+ s -> strflags = strflags ;
192319 memcpy (strdatawr (s ), str , len );
193320 strdatawr (s )[len ] = '\0' ; /* Zero-terminate string. */
194321 /* Add it to string hash table. */
0 commit comments