Skip to content

Commit bee195a

Browse files
committed
Add implementation of emscripten_memcpy_big based on bulk memory.
These new functions live in `libbulkmemory` which only gets included if bulk memory is enabled (either via `-mbulk-memory` directly or indirectly via `-pthread). benchmark results for benchmark.test_memcpy_1mb: ``` v8: mean: 1.666 v8-bulkmemory: mean: 1.598 v8-standalone-bulkmemory: mean: 1.576 v8-standalone: mean: 3.197 ``` Here we can see the that when bulk memory is enabled its at least as fast if not faster than the JS version. v8-standalone doesn't have emscripten_memcpy_big at all is is much slower, as expected. By adding `-mbulk-memory` the standalone version becomes just as fast as the non-standalone.
1 parent 38eedc6 commit bee195a

14 files changed

+133
-13
lines changed

embuilder.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828

2929
# Minimal subset of targets used by CI systems to build enough to useful
3030
MINIMAL_TASKS = [
31+
'libbulkmemory',
32+
'libbulkmemory-debug',
3133
'libcompiler_rt',
3234
'libcompiler_rt-wasm-sjlj',
3335
'libc',
@@ -72,6 +74,8 @@
7274
# Additional tasks on top of MINIMAL_TASKS that are necessary for PIC testing on
7375
# CI (which has slightly more tests than other modes that want to use MINIMAL)
7476
MINIMAL_PIC_TASKS = MINIMAL_TASKS + [
77+
'libbulkmemory-mt',
78+
'libbulkmemory-mt-debug',
7579
'libcompiler_rt-mt',
7680
'libc-mt',
7781
'libc-mt-debug',

emcc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,9 @@ def phase_setup(options, state, newargs):
15991599
if '-mbulk-memory' not in newargs:
16001600
newargs += ['-mbulk-memory']
16011601

1602+
if settings.SHARED_MEMORY:
1603+
settings.BULK_MEMORY = 1
1604+
16021605
if 'DISABLE_EXCEPTION_CATCHING' in user_settings and 'EXCEPTION_CATCHING_ALLOWED' in user_settings:
16031606
# If we get here then the user specified both DISABLE_EXCEPTION_CATCHING and EXCEPTION_CATCHING_ALLOWED
16041607
# on the command line. This is no longer valid so report either an error or a warning (for
@@ -2434,6 +2437,8 @@ def phase_linker_setup(options, state, newargs):
24342437
settings.JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_wasm_worker.js')))
24352438

24362439
settings.SUPPORTS_GLOBALTHIS = feature_matrix.caniuse(feature_matrix.Feature.GLOBALTHIS)
2440+
if not settings.BULK_MEMORY:
2441+
settings.BULK_MEMORY = feature_matrix.caniuse(feature_matrix.Feature.BULK_MEMORY)
24372442

24382443
if settings.AUDIO_WORKLET:
24392444
if not settings.SUPPORTS_GLOBALTHIS:
@@ -3565,6 +3570,10 @@ def consume_arg_file():
35653570
settings.DISABLE_EXCEPTION_CATCHING = 1
35663571
settings.DISABLE_EXCEPTION_THROWING = 1
35673572
settings.WASM_EXCEPTIONS = 0
3573+
elif arg == '-mbulk-memory':
3574+
settings.BULK_MEMORY = 1
3575+
elif arg == '-mno-bulk-memory':
3576+
settings.BULK_MEMORY = 0
35683577
elif arg == '-fexceptions':
35693578
# TODO Currently -fexceptions only means Emscripten EH. Switch to wasm
35703579
# exception handling by default when -fexceptions is given when wasm

src/library.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,11 @@ mergeInto(LibraryManager.library, {
389389
// variant, so we should never emit emscripten_memcpy_big() in the build.
390390
// In STANDALONE_WASM we avoid the emscripten_memcpy_big dependency so keep
391391
// the wasm file standalone.
392+
// In BULK_MEMORY mode we include native versions of these functions based
393+
// on memory.fill and memory.copy.
392394
// In MAIN_MODULE=1 or EMCC_FORCE_STDLIBS mode all of libc is force included
393395
// so we cannot override parts of it, and therefore cannot use libc_optz.
394-
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM
396+
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM && !BULK_MEMORY
395397

396398
#if MIN_CHROME_VERSION < 45 || MIN_EDGE_VERSION < 14 || MIN_FIREFOX_VERSION < 34 || MIN_IE_VERSION != TARGET_NOT_SUPPORTED || MIN_SAFARI_VERSION < 100101
397399
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/copyWithin lists browsers that support TypedArray.prototype.copyWithin, but it

src/settings_internal.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,5 @@ var POST_JS_FILES = [];
256256

257257
// Set when -pthread / -sPTHREADS is passed
258258
var PTHREADS = false;
259+
260+
var BULK_MEMORY = false;

system/lib/libc/emscripten_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
void emscripten_memcpy_big(void* __restrict__ dest,
3131
const void* __restrict__ src,
3232
size_t n) EM_IMPORT(emscripten_memcpy_big);
33+
void emscripten_memset_big(void* ptr, char value, size_t n);
3334

3435
void emscripten_notify_memory_growth(size_t memory_index);
3536

system/lib/libc/emscripten_memcpy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void *__memcpy(void *restrict dest, const void *restrict src, size_t n) {
2929
unsigned char *block_aligned_d_end;
3030
unsigned char *d_end;
3131

32-
#ifndef EMSCRIPTEN_STANDALONE_WASM
32+
#if !defined(EMSCRIPTEN_STANDALONE_WASM) || defined(__wasm_bulk_memory__)
3333
if (n >= 512) {
3434
emscripten_memcpy_big(dest, src, n);
3535
return dest;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memcpy_big
8+
emscripten_memcpy_big:
9+
.functype emscripten_memcpy_big (PTR, PTR, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.copy 0, 0
14+
end_function

system/lib/libc/emscripten_memset.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,42 @@
1-
// XXX EMSCRIPTEN ASAN: build an uninstrumented version of memset
2-
#if defined(__EMSCRIPTEN__) && defined(__has_feature)
3-
#if __has_feature(address_sanitizer)
4-
#define memset __attribute__((no_sanitize("address"))) emscripten_builtin_memset
5-
#endif
1+
#include "emscripten_internal.h" // for emscripten_memset_big
2+
3+
#if defined(__has_feature) && __has_feature(address_sanitizer)
4+
// build an uninstrumented version of memset
5+
__attribute__((no_sanitize("address"))) void *__musl_memset(void *str, int c, size_t n);
6+
__attribute__((no_sanitize("address"))) void *__memset(void *str, int c, size_t n);
67
#endif
78

89
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
910

10-
#include <stddef.h>
11-
12-
void *memset(void *str, int c, size_t n) {
11+
void *__memset(void *str, int c, size_t n) {
1312
unsigned char *s = (unsigned char *)str;
1413
#pragma clang loop unroll(disable)
1514
while(n--) *s++ = c;
1615
return str;
1716
}
1817

18+
#elif defined(__wasm_bulk_memory__)
19+
20+
#define memset __musl_memset
21+
#include "musl/src/string/memset.c"
22+
#undef memset
23+
24+
void *__memset(void *str, int c, size_t n) {
25+
if (n >= 512) {
26+
emscripten_memset_big(str, c, n);
27+
return str;
28+
}
29+
return __musl_memset(str, c, n);
30+
}
31+
1932
#else
2033

34+
__attribute__((__weak__)) void *__memset(void *str, int c, size_t n);
35+
#define memset __memset
2136
#include "musl/src/string/memset.c"
37+
#undef memset
2238

2339
#endif
40+
41+
weak_alias(__memset, emscripten_builtin_memset);
42+
weak_alias(__memset, memset);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memset_big
8+
emscripten_memset_big:
9+
.functype emscripten_memset_big (PTR, i32, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.fill 0
14+
end_function

system/lib/standalone/standalone.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ int emscripten_resize_heap(size_t size) {
152152
}
153153

154154
double emscripten_get_now(void) {
155-
return (1000 * clock()) / (double)CLOCKS_PER_SEC;
155+
return (1000ll * clock()) / (double)CLOCKS_PER_SEC;
156156
}
157157

158158
// C++ ABI

test/other/test_memops_bulk_memory.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <assert.h>
2+
#include <string.h>
3+
4+
const char *hello = "hello";
5+
const char *world = "world";
6+
7+
int main() {
8+
char buffer[100];
9+
memset(buffer, 'a', 100);
10+
memcpy(buffer, hello, strlen(hello) + 1);
11+
assert(strcmp(buffer, hello) == 0);
12+
return 0;
13+
}

test/test_other.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13412,3 +13412,29 @@ def test_wasi_random_get(self):
1341213412
@requires_node
1341313413
def test_wasi_sched_yield(self):
1341413414
self.run_wasi_test_suite_test('wasi_sched_yield')
13415+
13416+
def test_memops_bulk_memory(self):
13417+
self.emcc_args += ['--profiling-funcs', '-fno-builtin']
13418+
13419+
def run(args, expect_bulk_mem):
13420+
self.do_runf(test_file('other/test_memops_bulk_memory.c'), emcc_args=args)
13421+
funcs = self.parse_wasm('test_memops_bulk_memory.wasm')[2]
13422+
js = read_file('test_memops_bulk_memory.js')
13423+
if expect_bulk_mem:
13424+
self.assertNotContained('_emscripten_memcpy_big', js)
13425+
self.assertIn('$emscripten_memcpy_big', funcs)
13426+
else:
13427+
self.assertContained('_emscripten_memcpy_big', js)
13428+
self.assertNotIn('$emscripten_memcpy_big', funcs)
13429+
13430+
# By default we expect to find _emscripten_memcpy_big in the generaed JS and not in the
13431+
# native code.
13432+
run([], expect_bulk_mem=False)
13433+
13434+
# With bulk memory enabled we expect *not* to find it.
13435+
run(['-mbulk-memory'], expect_bulk_mem=True)
13436+
13437+
run(['-mbulk-memory', '-mno-bulk-memory'], expect_bulk_mem=False)
13438+
13439+
# -pthread implicitly enables bulk memory too.
13440+
run(['-pthread'], expect_bulk_mem=True)

tools/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
'DEFAULT_TO_CXX',
7272
'WASM_OBJECT_FILES',
7373
'WASM_WORKERS',
74+
'BULK_MEMORY',
7475

7576
# Internal settings used during compilation
7677
'EXCEPTION_CATCHING_ALLOWED',

tools/system_libs.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,6 +1274,20 @@ def can_use(self):
12741274
not settings.LINKABLE and not os.environ.get('EMCC_FORCE_STDLIBS')
12751275

12761276

1277+
class libbulkmemory(MuslInternalLibrary,
1278+
DebugLibrary,
1279+
AsanInstrumentedLibrary,
1280+
MTLibrary):
1281+
name = 'libbulkmemory'
1282+
src_dir = 'system/lib/libc'
1283+
src_files = ['emscripten_memcpy.c', 'emscripten_memset.c',
1284+
'emscripten_memcpy_big.S', 'emscripten_memset_big.S']
1285+
cflags = ['-mbulk-memory']
1286+
1287+
def can_use(self):
1288+
return super(libbulkmemory, self).can_use() and settings.BULK_MEMORY
1289+
1290+
12771291
class libprintf_long_double(libc):
12781292
name = 'libprintf_long_double'
12791293
cflags = ['-DEMSCRIPTEN_PRINTF_LONG_DOUBLE']
@@ -1945,7 +1959,7 @@ def get_files(self):
19451959
'__main_void.c'])
19461960
files += files_in_path(
19471961
path='system/lib/libc',
1948-
filenames=['emscripten_memcpy.c'])
1962+
filenames=['emscripten_memcpy.c', 'emscripten_memset.c'])
19491963
# It is more efficient to use JS methods for time, normally.
19501964
files += files_in_path(
19511965
path='system/lib/libc/musl/src/time',
@@ -2154,7 +2168,8 @@ def add_sanitizer_libs():
21542168
if settings.SHRINK_LEVEL >= 2 and not settings.LINKABLE and \
21552169
not os.environ.get('EMCC_FORCE_STDLIBS'):
21562170
add_library('libc_optz')
2157-
2171+
if settings.BULK_MEMORY:
2172+
add_library('libbulkmemory')
21582173
if settings.STANDALONE_WASM:
21592174
add_library('libstandalonewasm')
21602175
if settings.ALLOW_UNIMPLEMENTED_SYSCALLS:

0 commit comments

Comments
 (0)