Skip to content

Commit ff34b48

Browse files
author
Mike Pall
committed
Redesign and harden string interning.
Up to 40% faster on hash-intensive benchmarks. With some ideas from Sokolov Yura.
1 parent a44f53a commit ff34b48

22 files changed

+394
-202
lines changed

src/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ XCFLAGS=
132132
#
133133
# This define is required to run LuaJIT under Valgrind. The Valgrind
134134
# header files must be installed. You should enable debug information, too.
135-
# Use --suppressions=lj.supp to avoid some false positives.
136135
#XCFLAGS+= -DLUAJIT_USE_VALGRIND
137136
#
138137
# This is the client for the GDB JIT API. GDB 7.0 or higher is required

src/lj.supp

Lines changed: 0 additions & 41 deletions
This file was deleted.

src/lj_arch.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,14 +637,31 @@ extern void *LJ_WIN_LOADLIBA(const char *path);
637637

638638
/* Don't make any changes here. Instead build with:
639639
** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
640+
**
641+
** Important note to distro maintainers: DO NOT change the defaults for a
642+
** regular distro build -- neither upwards, nor downwards!
643+
** These build-time configurable security flags are intended for embedders
644+
** who may have specific needs wrt. security vs. performance.
640645
*/
641646

642647
/* Security defaults. */
643648
#ifndef LUAJIT_SECURITY_PRNG
649+
/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
644650
#define LUAJIT_SECURITY_PRNG 1
645651
#endif
646652

653+
#ifndef LUAJIT_SECURITY_STRHASH
654+
/* String hash: 0 = sparse only, 1 = sparse + dense. */
655+
#define LUAJIT_SECURITY_STRHASH 1
656+
#endif
657+
658+
#ifndef LUAJIT_SECURITY_STRID
659+
/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
660+
#define LUAJIT_SECURITY_STRID 1
661+
#endif
662+
647663
#ifndef LUAJIT_SECURITY_MCODE
664+
/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
648665
#define LUAJIT_SECURITY_MCODE 1
649666
#endif
650667

src/lj_asm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ static uint32_t ir_khash(ASMState *as, IRIns *ir)
10291029
uint32_t lo, hi;
10301030
UNUSED(as);
10311031
if (irt_isstr(ir->t)) {
1032-
return ir_kstr(ir)->hash;
1032+
return ir_kstr(ir)->sid;
10331033
} else if (irt_isnum(ir->t)) {
10341034
lo = ir_knum(ir)->u32.lo;
10351035
hi = ir_knum(ir)->u32.hi << 1;

src/lj_asm_arm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -825,10 +825,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
825825
} else {
826826
emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
827827
emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
828-
if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */
828+
if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
829829
emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
830830
emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
831-
emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
831+
emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
832832
emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
833833
} else if (irref_isk(refkey)) {
834834
emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,

src/lj_asm_arm64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -847,9 +847,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
847847
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
848848
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
849849
} else if (irt_isstr(kt)) {
850-
/* Fetch of str->hash is cheaper than ra_allock. */
850+
/* Fetch of str->sid is cheaper than ra_allock. */
851851
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
852-
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
852+
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
853853
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
854854
} else { /* Must match with hash*() in lj_tab.c. */
855855
emit_dnm(as, A64I_ANDw, dest, dest, tmp);

src/lj_asm_mips.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
10411041
if (isk) {
10421042
/* Nothing to do. */
10431043
} else if (irt_isstr(kt)) {
1044-
emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
1044+
emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
10451045
} else { /* Must match with hash*() in lj_tab.c. */
10461046
emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
10471047
emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);

src/lj_asm_ppc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
721721
if (isk) {
722722
/* Nothing to do. */
723723
} else if (irt_isstr(kt)) {
724-
emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
724+
emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
725725
} else { /* Must match with hash*() in lj_tab.c. */
726726
emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
727727
emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);

src/lj_asm_x86.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1228,7 +1228,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
12281228
emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
12291229
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
12301230
} else if (irt_isstr(kt)) {
1231-
emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
1231+
emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid));
12321232
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
12331233
} else { /* Must match with hashrot() in lj_tab.c. */
12341234
emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));

src/lj_gc.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
417417
return p;
418418
}
419419

420+
/* Sweep one string interning table chain. Preserves hashalg bit. */
421+
static void gc_sweepstr(global_State *g, GCRef *chain)
422+
{
423+
/* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
424+
int ow = otherwhite(g);
425+
uintptr_t u = gcrefu(*chain);
426+
GCRef q;
427+
GCRef *p = &q;
428+
GCobj *o;
429+
setgcrefp(q, (u & ~(uintptr_t)1));
430+
while ((o = gcref(*p)) != NULL) {
431+
if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
432+
lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
433+
"sweep of undead string");
434+
makewhite(g, o); /* String is alive, change to the current white. */
435+
p = &o->gch.nextgc;
436+
} else { /* Otherwise string is dead, free it. */
437+
lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
438+
"sweep of unlive string");
439+
setgcrefr(*p, o->gch.nextgc);
440+
lj_str_free(g, gco2str(o));
441+
}
442+
}
443+
setgcrefp(*chain, (gcrefu(q) | (u & 1)));
444+
}
445+
420446
/* Check whether we can clear a key or a value slot from a table. */
421447
static int gc_mayclear(cTValue *o, int val)
422448
{
@@ -571,9 +597,9 @@ void lj_gc_freeall(global_State *g)
571597
/* Free everything, except super-fixed objects (the main thread). */
572598
g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
573599
gc_fullsweep(g, &g->gc.root);
574-
strmask = g->strmask;
600+
strmask = g->str.mask;
575601
for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
576-
gc_fullsweep(g, &g->strhash[i]);
602+
gc_sweepstr(g, &g->str.tab[i]);
577603
}
578604

579605
/* -- Collector ----------------------------------------------------------- */
@@ -636,8 +662,8 @@ static size_t gc_onestep(lua_State *L)
636662
return 0;
637663
case GCSsweepstring: {
638664
GCSize old = g->gc.total;
639-
gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
640-
if (g->gc.sweepstr > g->strmask)
665+
gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */
666+
if (g->gc.sweepstr > g->str.mask)
641667
g->gc.state = GCSsweep; /* All string hash chains sweeped. */
642668
lj_assertG(old >= g->gc.total, "sweep increased memory");
643669
g->gc.estimate -= old - g->gc.total;
@@ -649,8 +675,8 @@ static size_t gc_onestep(lua_State *L)
649675
lj_assertG(old >= g->gc.total, "sweep increased memory");
650676
g->gc.estimate -= old - g->gc.total;
651677
if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
652-
if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
653-
lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
678+
if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1)
679+
lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */
654680
if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
655681
g->gc.state = GCSfinalize;
656682
#if LJ_HASFFI

0 commit comments

Comments
 (0)