Skip to content

Commit 6f3cfe3

Browse files
authored
Add implementation of emscripten_memcpy_big based on bulk memory. (#19128)
These new functions live in `libbulkmemory` which only gets included if bulk memory is enabled (either via `-mbulk-memory` directly or indirectly via `-pthread). benchmark results for benchmark.test_memcpy_1mb: ``` v8: mean: 1.666 v8-bulkmemory: mean: 1.598 v8-standalone-bulkmemory: mean: 1.576 v8-standalone: mean: 3.197 ``` Here we can see the that when bulk memory is enabled its at least as fast if not faster than the JS version. v8-standalone doesn't have emscripten_memcpy_big at all is is much slower, as expected. By adding `-mbulk-memory` the standalone version becomes just as fast as the non-standalone.
1 parent 38eedc6 commit 6f3cfe3

16 files changed

+133
-16
lines changed

embuilder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
# Minimal subset of targets used by CI systems to build enough to useful
3030
MINIMAL_TASKS = [
31+
'libbulkmemory',
3132
'libcompiler_rt',
3233
'libcompiler_rt-wasm-sjlj',
3334
'libc',

emcc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,6 +1599,9 @@ def phase_setup(options, state, newargs):
15991599
if '-mbulk-memory' not in newargs:
16001600
newargs += ['-mbulk-memory']
16011601

1602+
if settings.SHARED_MEMORY:
1603+
settings.BULK_MEMORY = 1
1604+
16021605
if 'DISABLE_EXCEPTION_CATCHING' in user_settings and 'EXCEPTION_CATCHING_ALLOWED' in user_settings:
16031606
# If we get here then the user specified both DISABLE_EXCEPTION_CATCHING and EXCEPTION_CATCHING_ALLOWED
16041607
# on the command line. This is no longer valid so report either an error or a warning (for
@@ -2434,6 +2437,8 @@ def phase_linker_setup(options, state, newargs):
24342437
settings.JS_LIBRARIES.append((0, shared.path_from_root('src', 'library_wasm_worker.js')))
24352438

24362439
settings.SUPPORTS_GLOBALTHIS = feature_matrix.caniuse(feature_matrix.Feature.GLOBALTHIS)
2440+
if not settings.BULK_MEMORY:
2441+
settings.BULK_MEMORY = feature_matrix.caniuse(feature_matrix.Feature.BULK_MEMORY)
24372442

24382443
if settings.AUDIO_WORKLET:
24392444
if not settings.SUPPORTS_GLOBALTHIS:
@@ -3565,6 +3570,10 @@ def consume_arg_file():
35653570
settings.DISABLE_EXCEPTION_CATCHING = 1
35663571
settings.DISABLE_EXCEPTION_THROWING = 1
35673572
settings.WASM_EXCEPTIONS = 0
3573+
elif arg == '-mbulk-memory':
3574+
settings.BULK_MEMORY = 1
3575+
elif arg == '-mno-bulk-memory':
3576+
settings.BULK_MEMORY = 0
35683577
elif arg == '-fexceptions':
35693578
# TODO Currently -fexceptions only means Emscripten EH. Switch to wasm
35703579
# exception handling by default when -fexceptions is given when wasm

src/library.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,11 @@ mergeInto(LibraryManager.library, {
389389
// variant, so we should never emit emscripten_memcpy_big() in the build.
390390
// In STANDALONE_WASM we avoid the emscripten_memcpy_big dependency so keep
391391
// the wasm file standalone.
392+
// In BULK_MEMORY mode we include native versions of these functions based
393+
// on memory.fill and memory.copy.
392394
// In MAIN_MODULE=1 or EMCC_FORCE_STDLIBS mode all of libc is force included
393395
// so we cannot override parts of it, and therefore cannot use libc_optz.
394-
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM
396+
#if (SHRINK_LEVEL < 2 || LINKABLE || process.env.EMCC_FORCE_STDLIBS) && !STANDALONE_WASM && !BULK_MEMORY
395397

396398
#if MIN_CHROME_VERSION < 45 || MIN_EDGE_VERSION < 14 || MIN_FIREFOX_VERSION < 34 || MIN_IE_VERSION != TARGET_NOT_SUPPORTED || MIN_SAFARI_VERSION < 100101
397399
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray/copyWithin lists browsers that support TypedArray.prototype.copyWithin, but it

src/settings_internal.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,5 @@ var POST_JS_FILES = [];
256256

257257
// Set when -pthread / -sPTHREADS is passed
258258
var PTHREADS = false;
259+
260+
var BULK_MEMORY = false;

system/lib/libc/emscripten_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern "C" {
3030
void emscripten_memcpy_big(void* __restrict__ dest,
3131
const void* __restrict__ src,
3232
size_t n) EM_IMPORT(emscripten_memcpy_big);
33+
void emscripten_memset_big(void* ptr, char value, size_t n);
3334

3435
void emscripten_notify_memory_growth(size_t memory_index);
3536

system/lib/libc/emscripten_memcpy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void *__memcpy(void *restrict dest, const void *restrict src, size_t n) {
2929
unsigned char *block_aligned_d_end;
3030
unsigned char *d_end;
3131

32-
#ifndef EMSCRIPTEN_STANDALONE_WASM
32+
#if !defined(EMSCRIPTEN_STANDALONE_WASM) || defined(__wasm_bulk_memory__)
3333
if (n >= 512) {
3434
emscripten_memcpy_big(dest, src, n);
3535
return dest;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memcpy_big
8+
emscripten_memcpy_big:
9+
.functype emscripten_memcpy_big (PTR, PTR, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.copy 0, 0
14+
end_function

system/lib/libc/emscripten_memset.c

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,44 @@
1-
// XXX EMSCRIPTEN ASAN: build an uninstrumented version of memset
2-
#if defined(__EMSCRIPTEN__) && defined(__has_feature)
3-
#if __has_feature(address_sanitizer)
4-
#define memset __attribute__((no_sanitize("address"))) emscripten_builtin_memset
5-
#endif
1+
#include "emscripten_internal.h" // for emscripten_memset_big
2+
3+
#if defined(__has_feature) && __has_feature(address_sanitizer)
4+
// build an uninstrumented version of memset
5+
__attribute__((no_sanitize("address"))) void *__musl_memset(void *str, int c, size_t n);
6+
__attribute__((no_sanitize("address"))) void *__memset(void *str, int c, size_t n);
67
#endif
78

8-
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
9+
__attribute__((__weak__)) void *__musl_memset(void *str, int c, size_t n);
10+
__attribute__((__weak__)) void *__memset(void *str, int c, size_t n);
911

10-
#include <stddef.h>
12+
#ifdef EMSCRIPTEN_OPTIMIZE_FOR_OZ
1113

12-
void *memset(void *str, int c, size_t n) {
14+
void *__memset(void *str, int c, size_t n) {
1315
unsigned char *s = (unsigned char *)str;
1416
#pragma clang loop unroll(disable)
1517
while(n--) *s++ = c;
1618
return str;
1719
}
1820

21+
#elif defined(__wasm_bulk_memory__)
22+
23+
#define memset __musl_memset
24+
#include "musl/src/string/memset.c"
25+
#undef memset
26+
27+
void *__memset(void *str, int c, size_t n) {
28+
if (n >= 512) {
29+
emscripten_memset_big(str, c, n);
30+
return str;
31+
}
32+
return __musl_memset(str, c, n);
33+
}
34+
1935
#else
2036

37+
#define memset __memset
2138
#include "musl/src/string/memset.c"
39+
#undef memset
2240

2341
#endif
42+
43+
weak_alias(__memset, emscripten_builtin_memset);
44+
weak_alias(__memset, memset);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#ifdef __wasm64__
2+
#define PTR i64
3+
#else
4+
#define PTR i32
5+
#endif
6+
7+
.globl emscripten_memset_big
8+
emscripten_memset_big:
9+
.functype emscripten_memset_big (PTR, i32, PTR) -> ()
10+
local.get 0
11+
local.get 1
12+
local.get 2
13+
memory.fill 0
14+
end_function

system/lib/standalone/standalone.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ int emscripten_resize_heap(size_t size) {
152152
}
153153

154154
double emscripten_get_now(void) {
155-
return (1000 * clock()) / (double)CLOCKS_PER_SEC;
155+
return (1000ll * clock()) / (double)CLOCKS_PER_SEC;
156156
}
157157

158158
// C++ ABI

test/other/metadce/test_metadce_hello_O0.funcs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ $__lock
99
$__lockfile
1010
$__lshrti3
1111
$__memcpy
12+
$__memset
1213
$__ofl_lock
1314
$__ofl_unlock
1415
$__original_main
@@ -41,7 +42,6 @@ $isdigit
4142
$legalstub$dynCall_jiji
4243
$main
4344
$memchr
44-
$memset
4545
$out
4646
$pad
4747
$pop_arg

test/other/metadce/test_metadce_minimal_pthreads.funcs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
$__emscripten_stdout_seek
22
$__errno_location
33
$__memcpy
4+
$__memset
45
$__pthread_mutex_lock
56
$__pthread_mutex_trylock
67
$__pthread_mutex_unlock
@@ -63,7 +64,6 @@ $get_tasks_for_thread
6364
$init_file_lock
6465
$init_mparams
6566
$main
66-
$memset
6767
$nodtor
6868
$pthread_attr_destroy
6969
$receive_notification

test/other/test_memops_bulk_memory.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include <assert.h>
2+
#include <string.h>
3+
4+
const char *hello = "hello";
5+
const char *world = "world";
6+
7+
int main() {
8+
char buffer[100];
9+
memset(buffer, 'a', 100);
10+
memcpy(buffer, hello, strlen(hello) + 1);
11+
assert(strcmp(buffer, hello) == 0);
12+
return 0;
13+
}

test/test_other.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12219,7 +12219,7 @@ def test_standard_library_mapping(self):
1221912219
# Test the `-l` flags on the command line get mapped the correct libraries variant
1222012220
self.run_process([EMBUILDER, 'build', 'libc-mt-debug', 'libcompiler_rt-mt', 'libdlmalloc-mt'])
1222112221

12222-
libs = ['-lc', '-lcompiler_rt', '-lmalloc']
12222+
libs = ['-lc', '-lbulkmemory', '-lcompiler_rt', '-lmalloc']
1222312223
err = self.run_process([EMCC, test_file('hello_world.c'), '-pthread', '-nodefaultlibs', '-v'] + libs, stderr=PIPE).stderr
1222412224

1222512225
# Check that the linker was run with `-mt` variants because `-pthread` was passed.
@@ -13412,3 +13412,30 @@ def test_wasi_random_get(self):
1341213412
@requires_node
1341313413
def test_wasi_sched_yield(self):
1341413414
self.run_wasi_test_suite_test('wasi_sched_yield')
13415+
13416+
def test_memops_bulk_memory(self):
13417+
self.emcc_args += ['--profiling-funcs', '-fno-builtin']
13418+
13419+
def run(args, expect_bulk_mem):
13420+
self.do_runf(test_file('other/test_memops_bulk_memory.c'), emcc_args=args)
13421+
funcs = self.parse_wasm('test_memops_bulk_memory.wasm')[2]
13422+
js = read_file('test_memops_bulk_memory.js')
13423+
if expect_bulk_mem:
13424+
self.assertNotContained('_emscripten_memcpy_big', js)
13425+
self.assertIn('$emscripten_memcpy_big', funcs)
13426+
else:
13427+
self.assertContained('_emscripten_memcpy_big', js)
13428+
self.assertNotIn('$emscripten_memcpy_big', funcs)
13429+
13430+
# By default we expect to find _emscripten_memcpy_big in the generaed JS and not in the
13431+
# native code.
13432+
run([], expect_bulk_mem=False)
13433+
13434+
# With bulk memory enabled we expect *not* to find it.
13435+
run(['-mbulk-memory'], expect_bulk_mem=True)
13436+
13437+
run(['-mbulk-memory', '-mno-bulk-memory'], expect_bulk_mem=False)
13438+
13439+
# -pthread implicitly enables bulk memory too.
13440+
self.setup_node_pthreads()
13441+
run(['-pthread'], expect_bulk_mem=True)

tools/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
'DEFAULT_TO_CXX',
7272
'WASM_OBJECT_FILES',
7373
'WASM_WORKERS',
74+
'BULK_MEMORY',
7475

7576
# Internal settings used during compilation
7677
'EXCEPTION_CATCHING_ALLOWED',

tools/system_libs.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,6 +1274,17 @@ def can_use(self):
12741274
not settings.LINKABLE and not os.environ.get('EMCC_FORCE_STDLIBS')
12751275

12761276

1277+
class libbulkmemory(MuslInternalLibrary, AsanInstrumentedLibrary):
1278+
name = 'libbulkmemory'
1279+
src_dir = 'system/lib/libc'
1280+
src_files = ['emscripten_memcpy.c', 'emscripten_memset.c',
1281+
'emscripten_memcpy_big.S', 'emscripten_memset_big.S']
1282+
cflags = ['-mbulk-memory']
1283+
1284+
def can_use(self):
1285+
return super(libbulkmemory, self).can_use() and settings.BULK_MEMORY
1286+
1287+
12771288
class libprintf_long_double(libc):
12781289
name = 'libprintf_long_double'
12791290
cflags = ['-DEMSCRIPTEN_PRINTF_LONG_DOUBLE']
@@ -1945,7 +1956,7 @@ def get_files(self):
19451956
'__main_void.c'])
19461957
files += files_in_path(
19471958
path='system/lib/libc',
1948-
filenames=['emscripten_memcpy.c'])
1959+
filenames=['emscripten_memcpy.c', 'emscripten_memset.c'])
19491960
# It is more efficient to use JS methods for time, normally.
19501961
files += files_in_path(
19511962
path='system/lib/libc/musl/src/time',
@@ -2154,7 +2165,8 @@ def add_sanitizer_libs():
21542165
if settings.SHRINK_LEVEL >= 2 and not settings.LINKABLE and \
21552166
not os.environ.get('EMCC_FORCE_STDLIBS'):
21562167
add_library('libc_optz')
2157-
2168+
if settings.BULK_MEMORY:
2169+
add_library('libbulkmemory')
21582170
if settings.STANDALONE_WASM:
21592171
add_library('libstandalonewasm')
21602172
if settings.ALLOW_UNIMPLEMENTED_SYSCALLS:

0 commit comments

Comments
 (0)