Skip to content

Commit 6140404

Browse files
funny-falconrtsisyk
authored andcommitted
Fix severe slowdown on certain strings
The default "fast" string hash function samples only a few positions in a string, the remaining bytes don't affect the function's result. The function performs well for short strings; however long strings can yield extremely high collision rates. An adaptive schema was implemented. Two hash functions are used simultaneously. A bucket is picked based on the output of the fast hash function. If an item is to be inserted in a collision chain longer than a certain threshold, another bucket is picked based on the stronger hash function. Since two hash functions are used simultaneously, insert should consider two buckets. The second bucket is often NOT considered thanks to the bloom filter. The filter is rebuilt during GC cycle.
1 parent e6e25b4 commit 6140404

File tree

5 files changed

+190
-5
lines changed

5 files changed

+190
-5
lines changed

src/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ XCFLAGS=
148148
# everything. Use only if you suspect a problem with LuaJIT itself.
149149
#XCFLAGS+= -DLUA_USE_ASSERT
150150
#
151+
# Switch to harder (and slower) hash function when a collision chain in
152+
# the string hash table exceeds certain length.
153+
XCFLAGS+= -DLUAJIT_SMART_STRINGS=1
154+
#
151155
##############################################################################
152156
# You probably don't need to change anything below this line!
153157
##############################################################################

src/lj_cparse.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,17 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
10581058
while (cp->tok != ')') {
10591059
if (cp->tok == CTOK_IDENT) {
10601060
GCstr *attrstr = cp->str;
1061+
#if LUAJIT_SMART_STRINGS
1062+
/*
1063+
* Sadly, several option __name__-s exceed 12 bytes hence they
1064+
* could've been interned using the full hash. Strip "__" to stay
1065+
* within limits.
1066+
*/
1067+
const char *c = strdata(cp->str);
1068+
if (attrstr->len > 12 && c[0]=='_' && c[1]=='_' && c[2] != '_' &&
1069+
c[attrstr->len-2]=='_' && c[attrstr->len-1]=='_')
1070+
attrstr = lj_str_new(cp->L, c+2, attrstr->len-4);
1071+
#endif
10611072
cp_next(cp);
10621073
switch (attrstr->hash) {
10631074
case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */

src/lj_gc.c

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,33 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
409409
return p;
410410
}
411411

412+
/* Full sweep of a string chain. */
413+
static GCRef *gc_sweep_str_chain(global_State *g, GCRef *p)
414+
{
415+
/* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
416+
int ow = otherwhite(g);
417+
GCobj *o;
418+
while ((o = gcref(*p)) != NULL) {
419+
if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
420+
lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
421+
makewhite(g, o); /* Value is alive, change to the current white. */
422+
#if LUAJIT_SMART_STRINGS
423+
if (strsmart(&o->str)) {
424+
/* must match lj_str_new */
425+
bloomset(g->strbloom.new[0], o->str.hash >> (sizeof(o->str.hash)*8-6));
426+
bloomset(g->strbloom.new[1], o->str.strflags);
427+
}
428+
#endif
429+
p = &o->gch.nextgc;
430+
} else { /* Otherwise value is dead, free it. */
431+
lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
432+
setgcrefr(*p, o->gch.nextgc);
433+
lj_str_free(g, &o->str);
434+
}
435+
}
436+
return p;
437+
}
438+
412439
/* Check whether we can clear a key or a value slot from a table. */
413440
static int gc_mayclear(cTValue *o, int val)
414441
{
@@ -622,12 +649,21 @@ static size_t gc_onestep(lua_State *L)
622649
atomic(g, L);
623650
g->gc.state = GCSsweepstring; /* Start of sweep phase. */
624651
g->gc.sweepstr = 0;
652+
#if LUAJIT_SMART_STRINGS
653+
g->strbloom.new[0] = 0;
654+
g->strbloom.new[1] = 0;
655+
#endif
625656
return 0;
626657
case GCSsweepstring: {
627658
GCSize old = g->gc.total;
628-
gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
629-
if (g->gc.sweepstr > g->strmask)
659+
gc_sweep_str_chain(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
660+
if (g->gc.sweepstr > g->strmask) {
630661
g->gc.state = GCSsweep; /* All string hash chains sweeped. */
662+
#if LUAJIT_SMART_STRINGS
663+
g->strbloom.cur[0] = g->strbloom.new[0];
664+
g->strbloom.cur[1] = g->strbloom.new[1];
665+
#endif
666+
}
631667
lua_assert(old >= g->gc.total);
632668
g->gc.estimate -= old - g->gc.total;
633669
return GCSWEEPCOST;

src/lj_obj.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ typedef const TValue cTValue;
291291
typedef struct GCstr {
292292
GCHeader;
293293
uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
294-
uint8_t unused;
294+
uint8_t strflags; /* If LUAJIT_SMART_STRINGS: hash function used(+). */
295295
MSize hash; /* Hash of string. */
296296
MSize len; /* Size of string. */
297297
} GCstr;
@@ -301,6 +301,7 @@ typedef struct GCstr {
301301
#define strdatawr(s) ((char *)((s)+1))
302302
#define strVdata(o) strdata(strV(o))
303303
#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
304+
#define strsmart(s) ((s)->strflags >= 0xc0)
304305

305306
/* -- Userdata object ----------------------------------------------------- */
306307

@@ -595,6 +596,12 @@ typedef struct global_State {
595596
GCRef *strhash; /* String hash table (hash chain anchors). */
596597
MSize strmask; /* String hash mask (size of hash table - 1). */
597598
MSize strnum; /* Number of strings in hash table. */
599+
#if LUAJIT_SMART_STRINGS
600+
struct {
601+
BloomFilter cur[2];
602+
BloomFilter new[2];
603+
} strbloom;
604+
#endif
598605
lua_Alloc allocf; /* Memory allocator. */
599606
void *allocd; /* Memory allocator data. */
600607
GCState gc; /* Garbage collector. */

src/lj_str.c

Lines changed: 129 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,40 @@ void lj_str_resize(lua_State *L, MSize newmask)
130130
g->strhash = newhash;
131131
}
132132

133+
#if LUAJIT_SMART_STRINGS
134+
static LJ_AINLINE uint32_t
135+
lj_fullhash(const uint8_t *v, MSize len)
136+
{
137+
MSize a = 0, b = 0;
138+
MSize c = 0xcafedead;
139+
MSize d = 0xdeadbeef;
140+
MSize h = len;
141+
lua_assert(len >= 12);
142+
for(; len>8; len-=8, v+=8) {
143+
a ^= lj_getu32(v);
144+
b ^= lj_getu32(v+4);
145+
c += a;
146+
d += b;
147+
a = lj_rol(a, 5) - d;
148+
b = lj_rol(b, 7) - c;
149+
c = lj_rol(c, 24) ^ a;
150+
d = lj_rol(d, 1) ^ b;
151+
}
152+
a ^= lj_getu32(v+len-8);
153+
b ^= lj_getu32(v+len-4);
154+
c += b; c -= lj_rol(a, 9);
155+
d += a; d -= lj_rol(b, 18);
156+
h -= lj_rol(a^b,7);
157+
h += c; h += lj_rol(d,13);
158+
d ^= c; d -= lj_rol(c,25);
159+
h ^= d; h -= lj_rol(d,16);
160+
c ^= h; c -= lj_rol(h,4);
161+
d ^= c; d -= lj_rol(c,14);
162+
h ^= d; h -= lj_rol(d,24);
163+
return h;
164+
}
165+
#endif
166+
133167
/* Intern a string and return string object. */
134168
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
135169
{
@@ -138,6 +172,10 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
138172
GCobj *o;
139173
MSize len = (MSize)lenx;
140174
MSize a, b, h = len;
175+
uint8_t strflags = 0;
176+
#if LUAJIT_SMART_STRINGS
177+
unsigned collisions = 0;
178+
#endif
141179
if (lenx >= LJ_MAX_STR)
142180
lj_err_msg(L, LJ_ERR_STROV);
143181
g = G(L);
@@ -161,34 +199,123 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
161199
h ^= b; h -= lj_rol(b, 16);
162200
/* Check if the string has already been interned. */
163201
o = gcref(g->strhash[h & g->strmask]);
202+
#if LUAJIT_SMART_STRINGS
203+
/*
204+
** The default "fast" string hash function samples only a few positions
205+
** in a string, the remaining bytes don't affect the function's result.
206+
** The function performs well for short strings; however long strings
207+
** can yield extremely high collision rates.
208+
**
209+
** An adaptive schema was implemented. Two hash functions are used
210+
** simultaneously. A bucket is picked based on the output of the fast
211+
** hash function. If an item is to be inserted in a collision chain
212+
** longer than a certain threshold, another bucket is picked based on
213+
** the stronger hash function. Since two hash functions are used
214+
** simultaneously, insert should consider two buckets. The second bucket
215+
** is often NOT considered thanks to the bloom filter. The filter is
216+
** rebuilt during GC cycle.
217+
**
218+
** Parameters below were tuned on a set of benchmarks. Max_collisions is
219+
** also backed by theory: the expected maximum length of a collision
220+
** chain in a hash table with the fill factor of 1.0 is
221+
** O(log(N)/log(log(N))), assuming uniformly distributed random keys.
222+
** The upper bound for N=65,000 is 10, hence 40 is a clear indication of
223+
** an anomaly.
224+
**/
225+
#define max_collisions 40
226+
#define inc_collision_soft() (collisions++)
227+
/* If different strings yield the same hash sum, grow counter faster. */
228+
#define inc_collision_hard() (collisions+=1+(len>>4), 1)
229+
#else
230+
#define inc_collision_hard() (1)
231+
#define inc_collision_soft()
232+
#endif
164233
if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
165234
while (o != NULL) {
166235
GCstr *sx = gco2str(o);
167-
if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
236+
if (sx->hash == h && sx->len == len && inc_collision_hard() &&
237+
str_fastcmp(str, strdata(sx), len) == 0) {
168238
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
169239
if (isdead(g, o)) flipwhite(o);
170240
return sx; /* Return existing string. */
171241
}
172242
o = gcnext(o);
243+
inc_collision_soft();
173244
}
174245
} else { /* Slow path: end of string is too close to a page boundary. */
175246
while (o != NULL) {
176247
GCstr *sx = gco2str(o);
177-
if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
248+
if (sx->hash == h && sx->len == len && inc_collision_hard() &&
249+
memcmp(str, strdata(sx), len) == 0) {
178250
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
179251
if (isdead(g, o)) flipwhite(o);
180252
return sx; /* Return existing string. */
181253
}
182254
o = gcnext(o);
255+
inc_collision_soft();
183256
}
184257
}
258+
#if LUAJIT_SMART_STRINGS
259+
/* "Fast" hash function consumes all bytes of a string <= 12 bytes. */
260+
if (len > 12) {
261+
/*
262+
** The bloom filter is keyed with the high 12 bits of the fast
263+
** hash sum. The filter is rebuilt during GC cycle. It's beneficial
264+
** to have these bits readily available and avoid hash sum
265+
** recalculation during GC. High 6 bits are included in the "full"
266+
** hash sum, and bits 19-25 are stored in s->strflags.
267+
**/
268+
int search_fullh =
269+
bloomtest(g->strbloom.cur[0], h>>(sizeof(h)*8- 6)) != 0 &&
270+
bloomtest(g->strbloom.cur[1], h>>(sizeof(h)*8-12)) != 0;
271+
if (LJ_UNLIKELY(search_fullh || collisions > max_collisions)) {
272+
MSize fh = lj_fullhash((const uint8_t*)str, len);
273+
#define high6mask ((~(MSize)0)<<(sizeof(MSize)*8-6))
274+
fh = (fh >> 6) | (h & high6mask);
275+
if (search_fullh) {
276+
/* Recheck if the string has already been interned with "harder" hash. */
277+
o = gcref(g->strhash[fh & g->strmask]);
278+
if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
279+
while (o != NULL) {
280+
GCstr *sx = gco2str(o);
281+
if (sx->hash == fh && sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
282+
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
283+
if (isdead(g, o)) flipwhite(o);
284+
return sx; /* Return existing string. */
285+
}
286+
o = gcnext(o);
287+
}
288+
} else { /* Slow path: end of string is too close to a page boundary. */
289+
while (o != NULL) {
290+
GCstr *sx = gco2str(o);
291+
if (sx->hash == fh && sx->len == len && memcmp(str, strdata(sx), len) == 0) {
292+
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
293+
if (isdead(g, o)) flipwhite(o);
294+
return sx; /* Return existing string. */
295+
}
296+
o = gcnext(o);
297+
}
298+
}
299+
}
300+
if (collisions > max_collisions) {
301+
strflags = 0xc0 | ((h>>(sizeof(h)*8-12))&0x3f);
302+
bloomset(g->strbloom.cur[0], h>>(sizeof(h)*8- 6));
303+
bloomset(g->strbloom.cur[1], h>>(sizeof(h)*8-12));
304+
bloomset(g->strbloom.new[0], h>>(sizeof(h)*8- 6));
305+
bloomset(g->strbloom.new[1], h>>(sizeof(h)*8-12));
306+
h = fh;
307+
}
308+
}
309+
}
310+
#endif
185311
/* Nope, create a new string. */
186312
s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
187313
newwhite(g, s);
188314
s->gct = ~LJ_TSTR;
189315
s->len = len;
190316
s->hash = h;
191317
s->reserved = 0;
318+
s->strflags = strflags;
192319
memcpy(strdatawr(s), str, len);
193320
strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
194321
/* Add it to string hash table. */

0 commit comments

Comments
 (0)