From 4602c1bdfd84aebab526717d74c6b49dae5f6493 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Wed, 17 Apr 2024 16:17:42 -0700 Subject: [PATCH 1/5] Avoid creating JS symbols for symbols only used in dynamic linking Symbols that are exported using EMSCRIPTEN_KEEPALIVE are supposed to be exported to the outside world (i.e. on the Module object) and also be available to call JS within the module. Symbols exports for the purposed of dynamic linking so not need to be exported on the Module and are added (at runtime) to `wasmImports` which acts as a kind of global symbol table for the program. In in the case of `-sMAIN_MODULE=1` we export *all* symbols from all libraries, and prior to this change it was not possible to distingish between all the exported generated because of `--export-dynamic`, and the exports generated due to `EMSCRIPTEN_KEEPALIVE`. This change allows us to differentiate by running `wasm-ld` twice: once without `--export-dynamic` (to get the smaller list of `EMSCRIPTEN_KEEPALIVE`) and then once with `--export-dynamic` to produce the actual wasm that we output. This takes the list of exports that we turn in to JS globals from 7993 to 28, massively reducing the overhead of `-sMAIN_MODULE=1`. --- tools/emscripten.py | 46 +++++++++++++++++++++++++-------------------- tools/link.py | 30 +++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/tools/emscripten.py b/tools/emscripten.py index f73e2e1675b4c..f8d5fb8496879 100644 --- a/tools/emscripten.py +++ b/tools/emscripten.py @@ -290,22 +290,22 @@ def trim_asm_const_body(body): return body -def create_global_exports(metadata): - global_exports = [] - for k, v in metadata.global_exports.items(): +def create_global_exports(global_exports): + lines = [] + for k, v in global_exports.items(): v = int(v) if settings.RELOCATABLE: v += settings.GLOBAL_BASE mangled = asmjs_mangle(k) if settings.MINIMAL_RUNTIME: - global_exports.append("var %s = %s;" % (mangled, v)) + lines.append("var %s = %s;" % (mangled, v)) else: - global_exports.append("var %s = Module['%s'] = %s;" % (mangled, mangled, v)) + lines.append("var %s = Module['%s'] = %s;" % (mangled, mangled, v)) - return '\n'.join(global_exports) + return '\n'.join(lines) -def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True): +def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True, base_metadata=None): # Overview: # * Run wasm-emscripten-finalize to extract metadata and modify the binary # to use emscripten's wasm<->JS ABI @@ -329,12 +329,6 @@ def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True): if settings.RELOCATABLE and settings.MEMORY64 == 2: metadata.imports += ['__memory_base32'] - if settings.ASYNCIFY == 1: - metadata.function_exports['asyncify_start_unwind'] = webassembly.FuncType([webassembly.Type.I32], []) - metadata.function_exports['asyncify_stop_unwind'] = webassembly.FuncType([], []) - metadata.function_exports['asyncify_start_rewind'] = webassembly.FuncType([webassembly.Type.I32], []) - metadata.function_exports['asyncify_stop_rewind'] = webassembly.FuncType([], []) - # If the binary has already been finalized the settings have already been # updated and we can skip updating them. if finalize: @@ -444,18 +438,31 @@ def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True): '// === Body ===\n', '// === Body ===\n\n' + extra_code + '\n') + if base_metadata: + function_exports = base_metadata.function_exports + global_exports = base_metadata.global_exports + else: + function_exports = metadata.function_exports + global_exports = metadata.global_exports + + if settings.ASYNCIFY == 1: + function_exports['asyncify_start_unwind'] = webassembly.FuncType([webassembly.Type.I32], []) + function_exports['asyncify_stop_unwind'] = webassembly.FuncType([], []) + function_exports['asyncify_start_rewind'] = webassembly.FuncType([webassembly.Type.I32], []) + function_exports['asyncify_stop_rewind'] = webassembly.FuncType([], []) + with open(outfile_js, 'w', encoding='utf-8') as out: out.write(pre) pre = None - receiving = create_receiving(metadata.function_exports) + receiving = create_receiving(function_exports) if settings.MINIMAL_RUNTIME: if settings.DECLARE_ASM_MODULE_EXPORTS: - post = compute_minimal_runtime_initializer_and_exports(post, metadata.function_exports, receiving) + post = compute_minimal_runtime_initializer_and_exports(post, function_exports, receiving) receiving = '' - module = create_module(receiving, metadata, forwarded_json['librarySymbols']) + module = create_module(receiving, metadata, global_exports, forwarded_json['librarySymbols']) metadata.library_definitions = forwarded_json['libraryDefinitions'] @@ -638,8 +645,7 @@ def create_tsd(metadata, embind_tsd): out += create_tsd_exported_runtime_methods(metadata) # Manually generate defintions for any Wasm function exports. out += 'interface WasmModule {\n' - function_exports = metadata.function_exports - for name, types in function_exports.items(): + for name, types in metadata.function_exports.items(): mangled = asmjs_mangle(name) should_export = settings.EXPORT_KEEPALIVE and mangled in settings.EXPORTED_FUNCTIONS if not should_export: @@ -950,8 +956,8 @@ def create_receiving(function_exports): return '\n'.join(receiving) + '\n' -def create_module(receiving, metadata, library_symbols): - receiving += create_global_exports(metadata) +def create_module(receiving, metadata, global_exports, library_symbols): + receiving += create_global_exports(global_exports) module = [] sending = create_sending(metadata, library_symbols) diff --git a/tools/link.py b/tools/link.py index f227047b094b1..dd13d460600fe 100644 --- a/tools/link.py +++ b/tools/link.py @@ -32,6 +32,7 @@ from . import system_libs from . import utils from . import webassembly +from . import extract_metadata from .utils import read_file, read_binary, write_file, delete_file from .utils import removeprefix, exit_with_error from .shared import in_temp, safe_copy, do_replace, OFormat @@ -1847,11 +1848,28 @@ def phase_link(linker_arguments, wasm_target, js_syms): settings.REQUIRED_EXPORTS = dedup_list(settings.REQUIRED_EXPORTS) settings.EXPORT_IF_DEFINED = dedup_list(settings.EXPORT_IF_DEFINED) + rtn = None + if settings.LINKABLE: + # In LINKABLE mode we pass `--export-dynamic` along with `--whole-archive`. This results + # over 7000 exports, which cannot be distingished from the few symbols we explicitly + # export via EMSCRIPTEN_KEEPALIVE or EXPORTED_FUNCTIONS. + # In order to be able limit the number of symbols we export on the `Module` object we + # run the linker twice in this mode. + # 1. Without `--export-dynamic` to get the base exports + # 2. With `--export-dynamic` to get the actual linkable Wasm binary + # TODO(sbc): Remove this double execution of wasm-ld if we ever find a way to + # distingiush EMSCRIPTEN_KEEPALIVE exports from `--export-dynamic` exports. + settings.LINKABLE = False + building.link_lld(linker_arguments, wasm_target, external_symbols=js_syms) + settings.LINKABLE = True + rtn = extract_metadata.extract_metadata(wasm_target) + building.link_lld(linker_arguments, wasm_target, external_symbols=js_syms) + return rtn @ToolchainProfiler.profile_block('post link') -def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms): +def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms, base_metadata=None): global final_js target_basename = unsuffixed_basename(target) @@ -1868,7 +1886,7 @@ def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms): settings.TARGET_JS_NAME = os.path.basename(state.js_target) - metadata = phase_emscript(in_wasm, wasm_target, js_syms) + metadata = phase_emscript(in_wasm, wasm_target, js_syms, base_metadata) if settings.EMBIND_AOT: phase_embind_aot(wasm_target, js_syms) @@ -1887,7 +1905,7 @@ def phase_post_link(options, state, in_wasm, wasm_target, target, js_syms): @ToolchainProfiler.profile_block('emscript') -def phase_emscript(in_wasm, wasm_target, js_syms): +def phase_emscript(in_wasm, wasm_target, js_syms, base_metadata): # Emscripten logger.debug('emscript') @@ -1898,7 +1916,7 @@ def phase_emscript(in_wasm, wasm_target, js_syms): if shared.SKIP_SUBPROCS: return - metadata = emscripten.emscript(in_wasm, wasm_target, final_js, js_syms) + metadata = emscripten.emscript(in_wasm, wasm_target, final_js, js_syms, base_metadata=base_metadata) save_intermediate('original') return metadata @@ -3085,7 +3103,7 @@ def add_js_deps(sym): settings.ASYNCIFY_IMPORTS_EXCEPT_JS_LIBS = settings.ASYNCIFY_IMPORTS[:] settings.ASYNCIFY_IMPORTS += ['*.' + x for x in js_info['asyncFuncs']] - phase_link(linker_arguments, wasm_target, js_syms) + base_metadata = phase_link(linker_arguments, wasm_target, js_syms) # Special handling for when the user passed '-Wl,--version'. In this case the linker # does not create the output file, but just prints its version and exits with 0. @@ -3099,6 +3117,6 @@ def add_js_deps(sym): # Perform post-link steps (unless we are running bare mode) if options.oformat != OFormat.BARE: - phase_post_link(options, state, wasm_target, wasm_target, target, js_syms) + phase_post_link(options, state, wasm_target, wasm_target, target, js_syms, base_metadata) return 0 From 114247c5fb0741db04abce23daad4a4209fbcf5d Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 18 Apr 2024 14:03:19 -0700 Subject: [PATCH 2/5] feedback --- tools/link.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/link.py b/tools/link.py index dd13d460600fe..b516d43085e20 100644 --- a/tools/link.py +++ b/tools/link.py @@ -1851,14 +1851,14 @@ def phase_link(linker_arguments, wasm_target, js_syms): rtn = None if settings.LINKABLE: # In LINKABLE mode we pass `--export-dynamic` along with `--whole-archive`. This results - # over 7000 exports, which cannot be distingished from the few symbols we explicitly + # in over 7000 exports, which cannot be distinguished from the few symbols we explicitly # export via EMSCRIPTEN_KEEPALIVE or EXPORTED_FUNCTIONS. - # In order to be able limit the number of symbols we export on the `Module` object we - # run the linker twice in this mode. + # In order to avoid unnecessary exported symbols on the `Module` object we run the linker + # twice in this mode: # 1. Without `--export-dynamic` to get the base exports # 2. With `--export-dynamic` to get the actual linkable Wasm binary # TODO(sbc): Remove this double execution of wasm-ld if we ever find a way to - # distingiush EMSCRIPTEN_KEEPALIVE exports from `--export-dynamic` exports. + # distinguish EMSCRIPTEN_KEEPALIVE exports from `--export-dynamic` exports. settings.LINKABLE = False building.link_lld(linker_arguments, wasm_target, external_symbols=js_syms) settings.LINKABLE = True From 77c5810013f0bd0ef3b6effccbe9fb68a123a3b1 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 18 Apr 2024 14:14:16 -0700 Subject: [PATCH 3/5] update changelog --- ChangeLog.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 280f0e9bc0fe1..2c39936677144 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,13 @@ See docs/process.md for more on how version tagging works. 3.1.58 (in development) ----------------------- +- The `-sMAIN_MODULE=1` mode no longer exports all the main module symbols on + `Module` object. This saves a huge about of generated JS code due the fact + that `-sMAIN_MODULE=1` includes *all* native symbols in your program as well + is from the standard library. The generated JS code for a simple program + in this mode is reduced from from 3.3mb to 0.5mb. The current implementation + of this feature requires wasm-ld to be on the program twice which could have a + noticeable effect on link times. (#21785) - In `-sMODULARIZE` mode, the argument passed into the module constructor is no longer mutated in place. The expectation is that the module instance will be available via the constructor return value. Attempting to access methods From f9fc346bf505b87dc185aa05c171ec57a70e9747 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 18 Apr 2024 15:03:06 -0700 Subject: [PATCH 4/5] fix test --- tools/link.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/link.py b/tools/link.py index b516d43085e20..ac8a13f94d8d0 100644 --- a/tools/link.py +++ b/tools/link.py @@ -1849,7 +1849,7 @@ def phase_link(linker_arguments, wasm_target, js_syms): settings.EXPORT_IF_DEFINED = dedup_list(settings.EXPORT_IF_DEFINED) rtn = None - if settings.LINKABLE: + if settings.LINKABLE and not settings.EXPORT_ALL: # In LINKABLE mode we pass `--export-dynamic` along with `--whole-archive`. This results # in over 7000 exports, which cannot be distinguished from the few symbols we explicitly # export via EMSCRIPTEN_KEEPALIVE or EXPORTED_FUNCTIONS. From 61f9a9ed18915b7524379276047f38d7fad6161c Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Thu, 18 Apr 2024 18:06:41 -0700 Subject: [PATCH 5/5] fix --- tools/emscripten.py | 9 ++++++--- tools/link.py | 29 ++++++++++++++++------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tools/emscripten.py b/tools/emscripten.py index f8d5fb8496879..87e999080ae07 100644 --- a/tools/emscripten.py +++ b/tools/emscripten.py @@ -128,7 +128,7 @@ def align_memory(addr): return (addr + 15) & -16 -def update_settings_glue(wasm_file, metadata): +def update_settings_glue(wasm_file, metadata, base_metadata): maybe_disable_filesystem(metadata.imports) # Integrate info from backend @@ -142,7 +142,10 @@ def update_settings_glue(wasm_file, metadata): if settings.MAIN_MODULE: settings.WEAK_IMPORTS += webassembly.get_weak_imports(wasm_file) - settings.WASM_EXPORTS = metadata.all_exports + if base_metadata: + settings.WASM_EXPORTS = base_metadata.all_exports + else: + settings.WASM_EXPORTS = metadata.all_exports settings.WASM_GLOBAL_EXPORTS = list(metadata.global_exports.keys()) settings.HAVE_EM_ASM = bool(settings.MAIN_MODULE or len(metadata.em_asm_consts) != 0) @@ -332,7 +335,7 @@ def emscript(in_wasm, out_wasm, outfile_js, js_syms, finalize=True, base_metadat # If the binary has already been finalized the settings have already been # updated and we can skip updating them. if finalize: - update_settings_glue(out_wasm, metadata) + update_settings_glue(out_wasm, metadata, base_metadata) if not settings.WASM_BIGINT and metadata.em_js_funcs: import_map = {} diff --git a/tools/link.py b/tools/link.py index ac8a13f94d8d0..898903e1049ed 100644 --- a/tools/link.py +++ b/tools/link.py @@ -3086,19 +3086,22 @@ def run(linker_inputs, options, state, newargs): js_info = get_js_sym_info() if not settings.SIDE_MODULE: js_syms = js_info['deps'] - - def add_js_deps(sym): - if sym in js_syms: - native_deps = js_syms[sym] - if native_deps: - settings.REQUIRED_EXPORTS += native_deps - - for sym in settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE: - add_js_deps(sym) - for sym in js_info['extraLibraryFuncs']: - add_js_deps(sym) - for sym in settings.EXPORTED_RUNTIME_METHODS: - add_js_deps(shared.demangle_c_symbol_name(sym)) + if settings.LINKABLE: + for native_deps in js_syms.values(): + settings.REQUIRED_EXPORTS += native_deps + else: + def add_js_deps(sym): + if sym in js_syms: + native_deps = js_syms[sym] + if native_deps: + settings.REQUIRED_EXPORTS += native_deps + + for sym in settings.DEFAULT_LIBRARY_FUNCS_TO_INCLUDE: + add_js_deps(sym) + for sym in js_info['extraLibraryFuncs']: + add_js_deps(sym) + for sym in settings.EXPORTED_RUNTIME_METHODS: + add_js_deps(shared.demangle_c_symbol_name(sym)) if settings.ASYNCIFY: settings.ASYNCIFY_IMPORTS_EXCEPT_JS_LIBS = settings.ASYNCIFY_IMPORTS[:] settings.ASYNCIFY_IMPORTS += ['*.' + x for x in js_info['asyncFuncs']]