From 5ea876593af4e9232fc477aa10a0bc36c90f8726 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Mon, 12 Jun 2017 23:32:17 -0400 Subject: [PATCH 01/78] Add SINGLE_FILE option to embed all subresources into emitted JS As discussed in #5279, subresource paths are converted into base64 data URIs. --- emcc.py | 32 +++++++++++++++++++++----------- src/settings.js | 7 +++++++ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/emcc.py b/emcc.py index 654d26cda16f..2d16e0879c44 100755 --- a/emcc.py +++ b/emcc.py @@ -27,7 +27,7 @@ if __name__ == '__main__': ToolchainProfiler.record_process_start() -import os, sys, shutil, tempfile, subprocess, shlex, time, re, logging, urllib +import os, sys, shutil, tempfile, subprocess, shlex, time, re, logging, urllib, base64 from subprocess import PIPE from tools import shared, jsrun, system_libs from tools.shared import execute, suffix, unsuffixed, unsuffixed_basename, WINDOWS, safe_move @@ -513,6 +513,16 @@ def filter_emscripten_options(argv): # ---------------- Utilities --------------- + # Returns the subresource location for run-time access + def get_subresource_location(path): + if shared.Settings.SINGLE_FILE: + f = open(path, 'rb') + data = base64.b64encode(f.read()) + f.close() + return 'data:application/octet-stream;base64,' + data + else: + return os.path.basename(path) + seen_names = {} def uniquename(name): if name not in seen_names: @@ -799,7 +809,7 @@ def detect_fixed_language_mode(args): options.separate_asm = True logging.warning('forcing separate asm output (--separate-asm), because -s PRECISE_F32=2 or -s USE_PTHREADS=2 was passed.') if options.separate_asm: - shared.Settings.SEPARATE_ASM = os.path.basename(asm_target) + shared.Settings.SEPARATE_ASM = get_subresource_location(asm_target) if 'EMCC_STRICT' in os.environ: shared.Settings.STRICT = os.environ.get('EMCC_STRICT') != '0' @@ -1116,9 +1126,9 @@ def check(input_file): if shared.Settings.BINARYEN: # set file locations, so that JS glue can find what it needs - shared.Settings.WASM_TEXT_FILE = os.path.basename(wasm_text_target) - shared.Settings.WASM_BINARY_FILE = os.path.basename(wasm_binary_target) - shared.Settings.ASMJS_CODE_FILE = os.path.basename(asm_target) + shared.Settings.WASM_TEXT_FILE = get_subresource_location(wasm_text_target) + shared.Settings.WASM_BINARY_FILE = get_subresource_location(wasm_binary_target) + shared.Settings.ASMJS_CODE_FILE = get_subresource_location(asm_target) shared.Settings.ASM_JS = 2 # when targeting wasm, we use a wasm Memory, but that is not compatible with asm.js opts shared.Settings.GLOBAL_BASE = 1024 # leave some room for mapping global vars @@ -1624,12 +1634,12 @@ def repl(m): # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) if not shared.Settings.BINARYEN: - return 'memoryInitializer = "%s";' % os.path.basename(memfile) + return 'memoryInitializer = "%s";' % get_subresource_location(memfile) else: # with wasm, we may have the mem init file in the wasm binary already return ('memoryInitializer = Module["wasmJSMethod"].indexOf("asmjs") >= 0 || ' 'Module["wasmJSMethod"].indexOf("interpret-asm2wasm") >= 0 ? "%s" : null;' - % os.path.basename(memfile)) + % get_subresource_location(memfile)) src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1) open(final + '.mem.js', 'w').write(src) final += '.mem.js' @@ -2412,7 +2422,7 @@ def generate_html(target, options, js_target, target_basename, meminitXHR.responseType = 'arraybuffer'; meminitXHR.send(null); })(); -''' % os.path.basename(memfile)) + script.inline +''' % get_subresource_location(memfile)) + script.inline # Download .asm.js if --separate-asm was passed in an asm.js build, or if 'asmjs' is one # of the wasm run methods. @@ -2431,7 +2441,7 @@ def generate_html(target, options, js_target, target_basename, }, 1); // delaying even 1ms is enough to allow compilation memory to be reclaimed }; document.body.appendChild(script); -''' % (os.path.basename(asm_target), script.inline) +''' % (get_subresource_location(asm_target), script.inline) else: # may need to modify the asm code, load it as text, modify, and load asynchronously script.inline = ''' @@ -2454,7 +2464,7 @@ def generate_html(target, options, js_target, target_basename, document.body.appendChild(script); }; codeXHR.send(null); -''' % (os.path.basename(asm_target), '\n'.join(asm_mods), script.inline) +''' % (get_subresource_location(asm_target), '\n'.join(asm_mods), script.inline) if shared.Settings.BINARYEN and not shared.Settings.BINARYEN_ASYNC_COMPILATION: # We need to load the wasm file before anything else, it has to be synchronously ready TODO: optimize @@ -2468,7 +2478,7 @@ def generate_html(target, options, js_target, target_basename, %s }; wasmXHR.send(null); -''' % (os.path.basename(wasm_binary_target), script.inline) +''' % (get_subresource_location(wasm_binary_target), script.inline) html = open(target, 'wb') html_contents = shell.replace('{{{ SCRIPT }}}', script.replacement()) diff --git a/src/settings.js b/src/settings.js index c4c668e78c7e..d25ad52ad6fb 100644 --- a/src/settings.js +++ b/src/settings.js @@ -856,6 +856,13 @@ var FETCH = 0; // If nonzero, enables emscripten_fetch API. var ASMFS = 0; // If set to 1, uses the multithreaded filesystem that is implemented within the asm.js module, using emscripten_fetch. Implies -s FETCH=1. +var SINGLE_FILE = 0; // If set to 1, embeds all subresources in the emitted JS file + // by converting their file names into base64 data URIs. + // + // Note that using this option may require a change to consuming + // pages' Content Security Policies -- specifically, adding data: + // to their connect-src directives. + var WASM_TEXT_FILE = ''; // name of the file containing wasm text, if relevant var WASM_BINARY_FILE = ''; // name of the file containing wasm binary, if relevant var ASMJS_CODE_FILE = ''; // name of the file containing asm.js, if relevant From 3a562fa81b3bb07899c17cac4a661d95a985961c Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Tue, 13 Jun 2017 18:08:23 -0400 Subject: [PATCH 02/78] misc cleanup as suggested by @kripken (#5296) --- emcc.py | 32 +++++++++++--------------------- src/settings.js | 4 +++- tools/shared.py | 13 ++++++++++++- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/emcc.py b/emcc.py index 2d16e0879c44..420f69fd444d 100755 --- a/emcc.py +++ b/emcc.py @@ -27,7 +27,7 @@ if __name__ == '__main__': ToolchainProfiler.record_process_start() -import os, sys, shutil, tempfile, subprocess, shlex, time, re, logging, urllib, base64 +import os, sys, shutil, tempfile, subprocess, shlex, time, re, logging, urllib from subprocess import PIPE from tools import shared, jsrun, system_libs from tools.shared import execute, suffix, unsuffixed, unsuffixed_basename, WINDOWS, safe_move @@ -513,16 +513,6 @@ def filter_emscripten_options(argv): # ---------------- Utilities --------------- - # Returns the subresource location for run-time access - def get_subresource_location(path): - if shared.Settings.SINGLE_FILE: - f = open(path, 'rb') - data = base64.b64encode(f.read()) - f.close() - return 'data:application/octet-stream;base64,' + data - else: - return os.path.basename(path) - seen_names = {} def uniquename(name): if name not in seen_names: @@ -809,7 +799,7 @@ def detect_fixed_language_mode(args): options.separate_asm = True logging.warning('forcing separate asm output (--separate-asm), because -s PRECISE_F32=2 or -s USE_PTHREADS=2 was passed.') if options.separate_asm: - shared.Settings.SEPARATE_ASM = get_subresource_location(asm_target) + shared.Settings.SEPARATE_ASM = shared.JS.get_subresource_location(asm_target) if 'EMCC_STRICT' in os.environ: shared.Settings.STRICT = os.environ.get('EMCC_STRICT') != '0' @@ -1126,9 +1116,9 @@ def check(input_file): if shared.Settings.BINARYEN: # set file locations, so that JS glue can find what it needs - shared.Settings.WASM_TEXT_FILE = get_subresource_location(wasm_text_target) - shared.Settings.WASM_BINARY_FILE = get_subresource_location(wasm_binary_target) - shared.Settings.ASMJS_CODE_FILE = get_subresource_location(asm_target) + shared.Settings.WASM_TEXT_FILE = shared.JS.get_subresource_location(wasm_text_target) + shared.Settings.WASM_BINARY_FILE = shared.JS.get_subresource_location(wasm_binary_target) + shared.Settings.ASMJS_CODE_FILE = shared.JS.get_subresource_location(asm_target) shared.Settings.ASM_JS = 2 # when targeting wasm, we use a wasm Memory, but that is not compatible with asm.js opts shared.Settings.GLOBAL_BASE = 1024 # leave some room for mapping global vars @@ -1634,12 +1624,12 @@ def repl(m): # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) if not shared.Settings.BINARYEN: - return 'memoryInitializer = "%s";' % get_subresource_location(memfile) + return 'memoryInitializer = "%s";' % shared.JS.get_subresource_location(memfile) else: # with wasm, we may have the mem init file in the wasm binary already return ('memoryInitializer = Module["wasmJSMethod"].indexOf("asmjs") >= 0 || ' 'Module["wasmJSMethod"].indexOf("interpret-asm2wasm") >= 0 ? "%s" : null;' - % get_subresource_location(memfile)) + % shared.JS.get_subresource_location(memfile)) src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1) open(final + '.mem.js', 'w').write(src) final += '.mem.js' @@ -2422,7 +2412,7 @@ def generate_html(target, options, js_target, target_basename, meminitXHR.responseType = 'arraybuffer'; meminitXHR.send(null); })(); -''' % get_subresource_location(memfile)) + script.inline +''' % shared.JS.get_subresource_location(memfile)) + script.inline # Download .asm.js if --separate-asm was passed in an asm.js build, or if 'asmjs' is one # of the wasm run methods. @@ -2441,7 +2431,7 @@ def generate_html(target, options, js_target, target_basename, }, 1); // delaying even 1ms is enough to allow compilation memory to be reclaimed }; document.body.appendChild(script); -''' % (get_subresource_location(asm_target), script.inline) +''' % (shared.JS.get_subresource_location(asm_target), script.inline) else: # may need to modify the asm code, load it as text, modify, and load asynchronously script.inline = ''' @@ -2464,7 +2454,7 @@ def generate_html(target, options, js_target, target_basename, document.body.appendChild(script); }; codeXHR.send(null); -''' % (get_subresource_location(asm_target), '\n'.join(asm_mods), script.inline) +''' % (shared.JS.get_subresource_location(asm_target), '\n'.join(asm_mods), script.inline) if shared.Settings.BINARYEN and not shared.Settings.BINARYEN_ASYNC_COMPILATION: # We need to load the wasm file before anything else, it has to be synchronously ready TODO: optimize @@ -2478,7 +2468,7 @@ def generate_html(target, options, js_target, target_basename, %s }; wasmXHR.send(null); -''' % (get_subresource_location(wasm_binary_target), script.inline) +''' % (shared.JS.get_subresource_location(wasm_binary_target), script.inline) html = open(target, 'wb') html_contents = shell.replace('{{{ SCRIPT }}}', script.replacement()) diff --git a/src/settings.js b/src/settings.js index d25ad52ad6fb..3afa115dd346 100644 --- a/src/settings.js +++ b/src/settings.js @@ -857,7 +857,9 @@ var FETCH = 0; // If nonzero, enables emscripten_fetch API. var ASMFS = 0; // If set to 1, uses the multithreaded filesystem that is implemented within the asm.js module, using emscripten_fetch. Implies -s FETCH=1. var SINGLE_FILE = 0; // If set to 1, embeds all subresources in the emitted JS file - // by converting their file names into base64 data URIs. + // by converting their file names into base64 data URIs. Embedded + // subresources may include (but aren't limited to) wasm, asm.js, + // and static memory initialization code. // // Note that using this option may require a change to consuming // pages' Content Security Policies -- specifically, adding data: diff --git a/tools/shared.py b/tools/shared.py index e56cb31897a0..38cdf0951e47 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1,5 +1,5 @@ from toolchain_profiler import ToolchainProfiler -import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, re, errno +import shutil, time, os, sys, base64, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, re, errno from subprocess import Popen, PIPE, STDOUT from tempfile import mkstemp from distutils.spawn import find_executable @@ -2277,6 +2277,17 @@ class JS: def to_nice_ident(ident): # limited version of the JS function toNiceIdent return ident.replace('%', '$').replace('@', '_').replace('.', '_') + # Returns the subresource location for run-time access + @staticmethod + def get_subresource_location(path): + if Settings.SINGLE_FILE: + f = open(path, 'rb') + data = base64.b64encode(f.read()) + f.close() + return 'data:application/octet-stream;base64,' + data + else: + return os.path.basename(path) + @staticmethod def make_initializer(sig, settings=None): settings = settings or Settings From d2edd4c8b8e54b4428cea0656273c6267db23e4e Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Tue, 13 Jun 2017 18:57:03 -0400 Subject: [PATCH 03/78] replace --memory-init-file 0 logic with SINGLE_FILE-style embedding (#5296) --- emcc.py | 17 ++++------- src/postamble.js | 2 -- tools/shared.py | 73 ++---------------------------------------------- 3 files changed, 7 insertions(+), 85 deletions(-) diff --git a/emcc.py b/emcc.py index 420f69fd444d..db560c6a18b2 100755 --- a/emcc.py +++ b/emcc.py @@ -1605,7 +1605,9 @@ def get_final(): with ToolchainProfiler.profile_block('memory initializer'): memfile = None - if shared.Settings.MEM_INIT_METHOD > 0: + embed_memfile = shared.Settings.MEM_INIT_METHOD == 0 and (not shared.Settings.MAIN_MODULE and not shared.Settings.SIDE_MODULE and options.debug_level < 4) + + if shared.Settings.MEM_INIT_METHOD > 0 or embed_memfile: memfile = target + '.mem' shared.try_delete(memfile) def repl(m): @@ -1624,12 +1626,12 @@ def repl(m): # Copy into temp dir as well, so can be run there too shared.safe_copy(memfile, os.path.join(shared.get_emscripten_temp_dir(), os.path.basename(memfile))) if not shared.Settings.BINARYEN: - return 'memoryInitializer = "%s";' % shared.JS.get_subresource_location(memfile) + return 'memoryInitializer = "%s";' % shared.JS.get_subresource_location(memfile, embed_memfile) else: # with wasm, we may have the mem init file in the wasm binary already return ('memoryInitializer = Module["wasmJSMethod"].indexOf("asmjs") >= 0 || ' 'Module["wasmJSMethod"].indexOf("interpret-asm2wasm") >= 0 ? "%s" : null;' - % shared.JS.get_subresource_location(memfile)) + % shared.JS.get_subresource_location(memfile, embed_memfile)) src = re.sub(shared.JS.memory_initializer_pattern, repl, open(final).read(), count=1) open(final + '.mem.js', 'w').write(src) final += '.mem.js' @@ -1641,15 +1643,6 @@ def repl(m): logging.debug('wrote memory initialization to %s', memfile) else: logging.debug('did not see memory initialization') - elif not shared.Settings.MAIN_MODULE and not shared.Settings.SIDE_MODULE and options.debug_level < 4: - # not writing a binary init, but we can at least optimize them by splitting them up - src = open(final).read() - src = shared.JS.optimize_initializer(src) - if src is not None: - logging.debug('optimizing memory initialization') - open(final + '.mem.js', 'w').write(src) - final += '.mem.js' - src = None if shared.Settings.USE_PTHREADS: target_dir = os.path.dirname(os.path.abspath(target)) diff --git a/src/postamble.js b/src/postamble.js index d711240c721c..10de96879a5f 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -33,7 +33,6 @@ if (memoryInitializer) (function(s) { } })(memoryInitializer); #else -#if MEM_INIT_METHOD == 1 #if USE_PTHREADS if (memoryInitializer && !ENVIRONMENT_IS_PTHREAD) { #else @@ -94,7 +93,6 @@ if (memoryInitializer) { } } #endif -#endif #if CYBERDWARF Module['cyberdwarf'] = _cyberdwarf_Debugger(cyberDWARFFile); diff --git a/tools/shared.py b/tools/shared.py index 38cdf0951e47..15808dd9e7f9 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -2279,8 +2279,8 @@ def to_nice_ident(ident): # limited version of the JS function toNiceIdent # Returns the subresource location for run-time access @staticmethod - def get_subresource_location(path): - if Settings.SINGLE_FILE: + def get_subresource_location(path, force_data_uri=False): + if Settings.SINGLE_FILE or force_data_uri: f = open(path, 'rb') data = base64.b64encode(f.read()) f.close() @@ -2400,75 +2400,6 @@ def align(x, by): while x % by != 0: x += 1 return x - INITIALIZER_CHUNK_SIZE = 10240 - - @staticmethod - def collect_initializers(src): - ret = [] - max_offset = -1 - for init in re.finditer(JS.memory_initializer_pattern, src): - contents = init.group(1).split(',') - offset = sum([int(x) if x[0] != 'R' else 0 for x in init.group(2).split('+')]) - ret.append((offset, contents)) - assert offset > max_offset - max_offset = offset - return ret - - @staticmethod - def split_initializer(contents): - # given a memory initializer (see memory_initializer_pattern), split it up into multiple initializers to avoid long runs of zeros or a single overly-large allocator - ret = [] - l = len(contents) - maxx = JS.INITIALIZER_CHUNK_SIZE - i = 0 - start = 0 - while 1: - if i - start >= maxx or (i > start and i == l): - #print >> sys.stderr, 'new', start, i-start - ret.append((start, contents[start:i])) - start = i - if i == l: break - if contents[i] != '0': - i += 1 - else: - # look for a sequence of zeros - j = i + 1 - while j < l and contents[j] == '0': j += 1 - if j-i > maxx/10 or j-start >= maxx: - #print >> sys.stderr, 'skip', start, i-start, j-start - ret.append((start, contents[start:i])) # skip over the zeros starting at i and ending at j - start = j - i = j - return ret - - @staticmethod - def replace_initializers(src, inits): - class State: - first = True - def rep(m): - if not State.first: return '' - # write out all the new initializers in place of the first old one - State.first = False - def gen_init(init): - offset, contents = init - return '/* memory initializer */ allocate([%s], "i8", ALLOC_NONE, Runtime.GLOBAL_BASE%s);' % ( - ','.join(contents), - '' if offset == 0 else ('+%d' % offset) - ) - return '\n'.join(map(gen_init, inits)) - return re.sub(JS.memory_initializer_pattern, rep, src) - - @staticmethod - def optimize_initializer(src): - inits = JS.collect_initializers(src) - if len(inits) == 0: return None - assert len(inits) == 1 - init = inits[0] - offset, contents = init - assert offset == 0 # offset 0, singleton - if len(contents) <= JS.INITIALIZER_CHUNK_SIZE: return None - return JS.replace_initializers(src, JS.split_initializer(contents)) - @staticmethod def generate_string_initializer(s): if Settings.ASSERTIONS: From e9b81ccd7cc7c07f4b6130cdbbf95b7e532746f3 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Tue, 13 Jun 2017 20:29:45 -0400 Subject: [PATCH 04/78] parse data URIs in JS to resolve cross-environment and CSP issues (#5296) --- src/jsifier.js | 5 +++- src/settings.js | 4 --- src/shell.js | 68 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 11 deletions(-) diff --git a/src/jsifier.js b/src/jsifier.js index 7522d471bf78..f58b596be346 100644 --- a/src/jsifier.js +++ b/src/jsifier.js @@ -26,6 +26,8 @@ function JSify(data, functionsOnly) { if (mainPass) { var shellFile = SHELL_FILE ? SHELL_FILE : (BUILD_AS_SHARED_LIB || SIDE_MODULE ? 'shell_sharedlib.js' : 'shell.js'); + var thirdPartyFiles = ['../third_party/sodiumutil/dist/sodiumutil.js']; + // We will start to print out the data, but must do so carefully - we are // dealing with potentially *huge* strings. Convenient replacements and // manipulations may create in-memory copies, and we may OOM. @@ -45,7 +47,8 @@ function JSify(data, functionsOnly) { // else. This lets us not hold any strings in memory, we simply print // things out as they are ready. - var shellParts = read(shellFile).split('{{BODY}}'); + var thirdParty = thirdPartyFiles.map(function(f) { return read(f) }).join('\n'); + var shellParts = read(shellFile).replace('{{THIRD_PARTY}}', thirdParty).split('{{BODY}}'); print(processMacros(preprocess(shellParts[0], shellFile))); var preFile = BUILD_AS_SHARED_LIB || SIDE_MODULE ? 'preamble_sharedlib.js' : 'preamble.js'; var pre = processMacros(preprocess(read(preFile).replace('{{RUNTIME}}', getRuntime()), preFile)); diff --git a/src/settings.js b/src/settings.js index 3afa115dd346..4555e1cf05de 100644 --- a/src/settings.js +++ b/src/settings.js @@ -860,10 +860,6 @@ var SINGLE_FILE = 0; // If set to 1, embeds all subresources in the emitted JS f // by converting their file names into base64 data URIs. Embedded // subresources may include (but aren't limited to) wasm, asm.js, // and static memory initialization code. - // - // Note that using this option may require a change to consuming - // pages' Content Security Policies -- specifically, adding data: - // to their connect-src directives. var WASM_TEXT_FILE = ''; // name of the file containing wasm text, if relevant var WASM_BINARY_FILE = ''; // name of the file containing wasm binary, if relevant diff --git a/src/shell.js b/src/shell.js index 789de77778b9..0da0fe1f257e 100644 --- a/src/shell.js +++ b/src/shell.js @@ -71,6 +71,8 @@ if (!ENVIRONMENT_IS_PTHREAD) PthreadWorkerInit = {}; var currentScriptUrl = ENVIRONMENT_IS_WORKER ? undefined : document.currentScript.src; #endif +{{THIRD_PARTY}} + if (ENVIRONMENT_IS_NODE) { // Expose functionality in the same simple way that the shells work // Note that we pollute the global namespace here, otherwise we break in node @@ -81,10 +83,13 @@ if (ENVIRONMENT_IS_NODE) { var nodePath; Module['read'] = function shell_read(filename, binary) { - if (!nodeFS) nodeFS = require('fs'); - if (!nodePath) nodePath = require('path'); - filename = nodePath['normalize'](filename); - var ret = nodeFS['readFileSync'](filename); + var ret = parseDataURI(filename); + if (!ret) { + if (!nodeFS) nodeFS = require('fs'); + if (!nodePath) nodePath = require('path'); + filename = nodePath['normalize'](filename); + ret = nodeFS['readFileSync'](filename); + } return binary ? ret : ret.toString(); }; @@ -131,16 +136,26 @@ else if (ENVIRONMENT_IS_SHELL) { if (typeof printErr != 'undefined') Module['printErr'] = printErr; // not present in v8 or older sm if (typeof read != 'undefined') { - Module['read'] = read; + Module['read'] = function shell_read(f) { + var data = parseDataURI(f); + if (data) { + return sodiumUtil.to_string(data); + } + return read(f); + }; } else { Module['read'] = function shell_read() { throw 'no read() available' }; } Module['readBinary'] = function readBinary(f) { + var data = parseDataURI(f); + if (data) { + return data; + } if (typeof readbuffer === 'function') { return new Uint8Array(readbuffer(f)); } - var data = read(f, 'binary'); + data = read(f, 'binary'); assert(typeof data === 'object'); return data; }; @@ -163,6 +178,10 @@ else if (ENVIRONMENT_IS_SHELL) { } else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { Module['read'] = function shell_read(url) { + var data = parseDataURI(url); + if (data) { + return sodiumUtil.to_string(data); + } var xhr = new XMLHttpRequest(); xhr.open('GET', url, false); xhr.send(null); @@ -171,6 +190,10 @@ else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { if (ENVIRONMENT_IS_WORKER) { Module['readBinary'] = function readBinary(url) { + var data = parseDataURI(f); + if (data) { + return data; + } var xhr = new XMLHttpRequest(); xhr.open('GET', url, false); xhr.responseType = 'arraybuffer'; @@ -180,6 +203,17 @@ else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { } Module['readAsync'] = function readAsync(url, onload, onerror) { + try { + var data = parseDataURI(url); + if (data) { + setTimeout(function () { onload(data.buffer); }, 0); + return; + } + } + catch (err) { + setTimeout(function () { onerror(err); }, 0); + return; + } var xhr = new XMLHttpRequest(); xhr.open('GET', url, true); xhr.responseType = 'arraybuffer'; @@ -228,6 +262,28 @@ else { throw 'Unknown runtime environment. Where are we?'; } +// If filename is a base64 data URI, parses and returns data (Buffer on node, +// Uint8Array otherwise). If filename is not a base64 data URI, returns undefined. +function parseDataURI(filename) { + var dataURIPrefix = 'data:application/octet-stream;base64,'; + + if (!( + String.prototype.startsWith ? + filename.startsWith(dataURIPrefix) : + filename.indexOf(dataURIPrefix) === 0 + )) { + return; + } + + var data = filename.slice(dataURIPrefix.length); + + if (ENVIRONMENT_IS_NODE) { + return Buffer.from(data, 'base64'); + } + + return sodiumUtil.from_base64(data); +} + function globalEval(x) { {{{ makeEval('eval.call(null, x);') }}} } From cb300b69b9ddf005704f189f2376523811f19bcb Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Wed, 14 Jun 2017 01:49:34 -0400 Subject: [PATCH 05/78] minor cleanup (#5296) --- emcc.py | 4 +++- src/shell.js | 14 +++++++------- tools/shared.py | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/emcc.py b/emcc.py index db560c6a18b2..8dd65e646c16 100755 --- a/emcc.py +++ b/emcc.py @@ -1605,7 +1605,7 @@ def get_final(): with ToolchainProfiler.profile_block('memory initializer'): memfile = None - embed_memfile = shared.Settings.MEM_INIT_METHOD == 0 and (not shared.Settings.MAIN_MODULE and not shared.Settings.SIDE_MODULE and options.debug_level < 4) + embed_memfile = (Settings.SINGLE_FILE or shared.Settings.MEM_INIT_METHOD == 0) and (not shared.Settings.MAIN_MODULE and not shared.Settings.SIDE_MODULE and options.debug_level < 4) if shared.Settings.MEM_INIT_METHOD > 0 or embed_memfile: memfile = target + '.mem' @@ -1637,6 +1637,8 @@ def repl(m): final += '.mem.js' src = None js_transform_tempfiles[-1] = final # simple text substitution preserves comment line number mappings + if embed_memfile: + os.remove(memfile) if DEBUG: if os.path.exists(memfile): save_intermediate('meminit') diff --git a/src/shell.js b/src/shell.js index 0da0fe1f257e..949b11e0f35f 100644 --- a/src/shell.js +++ b/src/shell.js @@ -83,7 +83,7 @@ if (ENVIRONMENT_IS_NODE) { var nodePath; Module['read'] = function shell_read(filename, binary) { - var ret = parseDataURI(filename); + var ret = tryParseAsDataURI(filename); if (!ret) { if (!nodeFS) nodeFS = require('fs'); if (!nodePath) nodePath = require('path'); @@ -137,7 +137,7 @@ else if (ENVIRONMENT_IS_SHELL) { if (typeof read != 'undefined') { Module['read'] = function shell_read(f) { - var data = parseDataURI(f); + var data = tryParseAsDataURI(f); if (data) { return sodiumUtil.to_string(data); } @@ -148,7 +148,7 @@ else if (ENVIRONMENT_IS_SHELL) { } Module['readBinary'] = function readBinary(f) { - var data = parseDataURI(f); + var data = tryParseAsDataURI(f); if (data) { return data; } @@ -178,7 +178,7 @@ else if (ENVIRONMENT_IS_SHELL) { } else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { Module['read'] = function shell_read(url) { - var data = parseDataURI(url); + var data = tryParseAsDataURI(url); if (data) { return sodiumUtil.to_string(data); } @@ -190,7 +190,7 @@ else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { if (ENVIRONMENT_IS_WORKER) { Module['readBinary'] = function readBinary(url) { - var data = parseDataURI(f); + var data = tryParseAsDataURI(f); if (data) { return data; } @@ -204,7 +204,7 @@ else if (ENVIRONMENT_IS_WEB || ENVIRONMENT_IS_WORKER) { Module['readAsync'] = function readAsync(url, onload, onerror) { try { - var data = parseDataURI(url); + var data = tryParseAsDataURI(url); if (data) { setTimeout(function () { onload(data.buffer); }, 0); return; @@ -264,7 +264,7 @@ else { // If filename is a base64 data URI, parses and returns data (Buffer on node, // Uint8Array otherwise). If filename is not a base64 data URI, returns undefined. -function parseDataURI(filename) { +function tryParseAsDataURI(filename) { var dataURIPrefix = 'data:application/octet-stream;base64,'; if (!( diff --git a/tools/shared.py b/tools/shared.py index 15808dd9e7f9..404ea97344fb 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -2279,8 +2279,8 @@ def to_nice_ident(ident): # limited version of the JS function toNiceIdent # Returns the subresource location for run-time access @staticmethod - def get_subresource_location(path, force_data_uri=False): - if Settings.SINGLE_FILE or force_data_uri: + def get_subresource_location(path, data_uri=Settings.SINGLE_FILE): + if data_uri: f = open(path, 'rb') data = base64.b64encode(f.read()) f.close() From 2e33fb00e9482fe1a83b73a3f6f0316d91da8d1b Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Wed, 14 Jun 2017 11:12:17 -0400 Subject: [PATCH 06/78] handle EMTERPRETIFY_FILE (#5296) --- emcc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emcc.py b/emcc.py index 8dd65e646c16..0056f9689561 100755 --- a/emcc.py +++ b/emcc.py @@ -2389,7 +2389,7 @@ def generate_html(target, options, js_target, target_basename, %s }; emterpretXHR.send(null); -''' % (shared.Settings.EMTERPRETIFY_FILE, script.inline) +''' % (shared.JS.get_subresource_location(shared.Settings.EMTERPRETIFY_FILE), script.inline) if options.memory_init_file: # start to load the memory init file in the HTML, in parallel with the JS From 32bee1e954b735f3cefca65b414bd6951bad0c18 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Wed, 14 Jun 2017 20:49:40 -0400 Subject: [PATCH 07/78] HTML worker fix (#5296) --- emcc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/emcc.py b/emcc.py index 0056f9689561..2870883b5cc6 100755 --- a/emcc.py +++ b/emcc.py @@ -2364,10 +2364,10 @@ def generate_html(target, options, js_target, target_basename, // note: no support for code mods (PRECISE_F32==2) console.log('running code on the main thread'); var script = document.createElement('script'); - script.src = "%s.js"; + script.src = "%s"; document.body.appendChild(script); } -''' % proxy_worker_filename +''' % shared.JS.get_subresource_location(proxy_worker_filename + '.js') else: # Normal code generation path script.src = base_js_target From a21e1d89d7f5262b01133e536bdaa374b12fb304 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Wed, 14 Jun 2017 21:10:59 -0400 Subject: [PATCH 08/78] document SINGLE_FILE + HTML output (#5296) --- src/settings.js | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/settings.js b/src/settings.js index 4555e1cf05de..f0ae35bc5b99 100644 --- a/src/settings.js +++ b/src/settings.js @@ -47,7 +47,9 @@ var INVOKE_RUN = 1; // Whether we will run the main() function. Disable if you e var NO_EXIT_RUNTIME = 0; // If set, the runtime is not quit when main() completes (allowing code to // run afterwards, for example from the browser main event loop). var MEM_INIT_METHOD = 0; // How to represent the initial memory content. - // 0: keep array literal representing the initial memory data + // 0: embed a base64 string literal representing the initial memory data; + // if using this in combination with HTML output, your Content Security + // Policy may need to be modified (see SINGLE_FILE for more details) // 1: create a *.mem file containing the binary data of the initial memory; // use the --memory-init-file command line switch to select this method // 2: embed a string literal representing that initial memory data @@ -856,10 +858,25 @@ var FETCH = 0; // If nonzero, enables emscripten_fetch API. var ASMFS = 0; // If set to 1, uses the multithreaded filesystem that is implemented within the asm.js module, using emscripten_fetch. Implies -s FETCH=1. -var SINGLE_FILE = 0; // If set to 1, embeds all subresources in the emitted JS file +var SINGLE_FILE = 0; // If set to 1, embeds all subresources in the emitted file // by converting their file names into base64 data URIs. Embedded // subresources may include (but aren't limited to) wasm, asm.js, // and static memory initialization code. + // + // MEM_INIT_METHOD 0 uses the same logic as SINGLE_FILE, so the following + // information applies to that as well: + // + // If used when generating JavaScript output, the subresource data URIs + // will be parsed directly in JavaScript for ensured compatibility + // across runtime environments. + // + // If used when generating HTML output, + + + ''') + self.run_browser('a.html', '...', '/report_result?0') From f98d511a96f533d4414d5bafa6028a3eb7046e75 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Wed, 11 Oct 2017 20:54:01 -0400 Subject: [PATCH 76/78] HTML generation fix (#5296) --- emcc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emcc.py b/emcc.py index cfa104a85140..bac9e4a66fcb 100755 --- a/emcc.py +++ b/emcc.py @@ -2566,7 +2566,7 @@ def generate_html(target, options, js_target, target_basename, if script.inline: for file in ['src/arrayUtils.js', 'src/base64Utils.js']: f = open(shared.path_from_root(file), 'r') - script.inline = '(function () {' + script.inline + f.read() + '})();' + script.inline = f.read() + script.inline f.close() html = open(target, 'wb') From 42992efa8832b5e475b78f319bf250ea79371319 Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Thu, 12 Oct 2017 19:00:57 -0400 Subject: [PATCH 77/78] atob function scope fix (#5296) --- src/base64Utils.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/base64Utils.js b/src/base64Utils.js index 480c91b74624..8d8b04e7b701 100644 --- a/src/base64Utils.js +++ b/src/base64Utils.js @@ -8,7 +8,7 @@ var keyStr = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; -var atob = atob || function (input) { +var decodeBase64 = typeof atob === 'function' ? atob : function (input) { /** * Decodes a base64 string. * @param {String} input The string to decode. @@ -55,7 +55,7 @@ function intArrayFromBase64(s) { } try { - var decoded = atob(s); + var decoded = decodeBase64(s); var bytes = new Uint8Array(decoded.length); for (var i = 0 ; i < decoded.length ; ++i) { bytes[i] = decoded.charCodeAt(i); From 8e45599a58598cb9e3038e19185431a5a4dfb81f Mon Sep 17 00:00:00 2001 From: Ryan Lester Date: Fri, 13 Oct 2017 15:12:55 -0400 Subject: [PATCH 78/78] asm.js synchronous init fix (#5296) --- src/postamble.js | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/postamble.js b/src/postamble.js index 48e0b607611c..ce813ed5b9de 100644 --- a/src/postamble.js +++ b/src/postamble.js @@ -65,16 +65,15 @@ if (memoryInitializer) { } function doBrowserLoad() { Module['readAsync'](memoryInitializer, applyMemoryInitializer, function() { -#if SUPPORT_BASE64_EMBEDDING - var memoryInitializerBytes = tryParseAsDataURI(memoryInitializer); - if (memoryInitializerBytes) { - applyMemoryInitializer(memoryInitializerBytes.buffer); - return; - } -#endif throw 'could not load memory initializer ' + memoryInitializer; }); } +#if SUPPORT_BASE64_EMBEDDING + var memoryInitializerBytes = tryParseAsDataURI(memoryInitializer); + if (memoryInitializerBytes) { + applyMemoryInitializer(memoryInitializerBytes.buffer); + } else +#endif if (Module['memoryInitializerRequest']) { // a network request has already been created, just use that function useRequest() {