From da5fa46ca3d114ca0a503d6a29dd530fe504c6f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sat, 13 Feb 2021 10:52:34 +0200 Subject: [PATCH 01/26] Drop using python multiprocessing pool. --- embuilder.py | 13 ++- tests/runner.py | 2 +- tools/building.py | 263 +----------------------------------------- tools/js_optimizer.py | 19 +-- tools/shared.py | 43 +++++++ tools/system_libs.py | 56 +++++---- 6 files changed, 90 insertions(+), 306 deletions(-) diff --git a/embuilder.py b/embuilder.py index 468ec2e72c7eb..0a4c2830da87e 100755 --- a/embuilder.py +++ b/embuilder.py @@ -15,6 +15,7 @@ import argparse import logging import sys +import time from tools import shared from tools import system_libs @@ -112,6 +113,9 @@ def build_port(port_name): def main(): global force + + all_build_start_time = time.time() + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, epilog=get_help()) @@ -166,6 +170,7 @@ def main(): print('Building targets: %s' % ' '.join(tasks)) for what in tasks: logger.info('building and verifying ' + what) + start_time = time.time() if what in SYSTEM_LIBRARIES: library = SYSTEM_LIBRARIES[what] if force: @@ -260,7 +265,13 @@ def main(): logger.error('unfamiliar build target: ' + what) return 1 - logger.info('...success') + time_taken = time.time() - start_time + logger.info('...success. Took %s(%.2fs)' % (('%02d:%02d mins ' % (time_taken // 60, time_taken % 60) if time_taken >= 60 else ''), time_taken)) + + if len(tasks) > 1: + all_build_time_taken = time.time() - all_build_start_time + logger.info('Built %d targets in %s(%.2fs)' % (len(tasks), ('%02d:%02d mins ' % (all_build_time_taken // 60, all_build_time_taken % 60) if all_build_time_taken >= 60 else ''), all_build_time_taken)) + return 0 diff --git a/tests/runner.py b/tests/runner.py index 871701598d787..0096597e1f8b7 100755 --- a/tests/runner.py +++ b/tests/runner.py @@ -791,7 +791,7 @@ def get_library(self, name, generated_libs, configure=['sh', './configure'], configure_args=[], make=['make'], make_args=None, env_init={}, cache_name_extra='', native=False): if make_args is None: - make_args = ['-j', str(building.get_num_cores())] + make_args = ['-j', str(shared.get_num_cores())] build_dir = self.get_build_dir() output_dir = self.get_dir() diff --git a/tools/building.py b/tools/building.py index d436174c78e57..ffdcd46931f21 100644 --- a/tools/building.py +++ b/tools/building.py @@ -6,7 +6,6 @@ import atexit import json import logging -import multiprocessing import os import re import shlex @@ -36,7 +35,6 @@ logger = logging.getLogger('building') # Building -multiprocessing_pool = None binaryen_checked = False EXPECTED_BINARYEN_VERSION = 100 @@ -119,15 +117,6 @@ def extract_archive_contents(archive_file): } -def g_multiprocessing_initializer(*args): - for item in args: - (key, value) = item.split('=', 1) - if key == 'EMCC_POOL_CWD': - os.chdir(value) - else: - os.environ[key] = value - - def unique_ordered(values): """return a list of unique values in an input list, without changing order (list(set(.)) would change order randomly). @@ -152,74 +141,6 @@ def clear(): _is_ar_cache.clear() -def get_num_cores(): - return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) - - -# Multiprocessing pools are very slow to build up and tear down, and having -# several pools throughout the application has a problem of overallocating -# child processes. Therefore maintain a single centralized pool that is shared -# between all pooled task invocations. -def get_multiprocessing_pool(): - global multiprocessing_pool - if not multiprocessing_pool: - cores = get_num_cores() - - # If running with one core only, create a mock instance of a pool that does not - # actually spawn any new subprocesses. Very useful for internal debugging. - if cores == 1: - class FakeMultiprocessor(object): - def map(self, func, tasks, *args, **kwargs): - results = [] - for t in tasks: - results += [func(t)] - return results - - def map_async(self, func, tasks, *args, **kwargs): - class Result: - def __init__(self, func, tasks): - self.func = func - self.tasks = tasks - - def get(self, timeout): - results = [] - for t in tasks: - results += [func(t)] - return results - - return Result(func, tasks) - - multiprocessing_pool = FakeMultiprocessor() - else: - child_env = [ - # Multiprocessing pool children must have their current working - # directory set to a safe path that is guaranteed not to die in - # between of executing commands, or otherwise the pool children will - # have trouble spawning subprocesses of their own. - 'EMCC_POOL_CWD=' + path_from_root(), - # Multiprocessing pool children can't spawn their own linear number of - # children, that could cause a quadratic amount of spawned processes. - 'EMCC_CORES=1' - ] - multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env) - - def close_multiprocessing_pool(): - global multiprocessing_pool - try: - # Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate - # noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal. - multiprocessing_pool.terminate() - multiprocessing_pool.join() - multiprocessing_pool = None - except OSError as e: - # Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through - if not (sys.platform.startswith('win') and isinstance(e, WindowsError) and e.winerror == 5): - raise - atexit.register(close_multiprocessing_pool) - - return multiprocessing_pool - - # .. but for Popen, we cannot have doublequotes, so provide functionality to # remove them when needed. def remove_quotes(arg): @@ -358,46 +279,6 @@ def llvm_nm(file): return llvm_nm_multiple([file])[0] -def read_link_inputs(files): - with ToolchainProfiler.profile_block('read_link_inputs'): - # Before performing the link, we need to look at each input file to determine which symbols - # each of them provides. Do this in multiple parallel processes. - archive_names = [] # .a files passed in to the command line to the link - object_names = [] # .o/.bc files passed in to the command line to the link - for f in files: - absolute_path_f = make_paths_absolute(f) - - if absolute_path_f not in ar_contents and is_ar(absolute_path_f): - archive_names.append(absolute_path_f) - elif absolute_path_f not in nm_cache and is_bitcode(absolute_path_f): - object_names.append(absolute_path_f) - - # Archives contain objects, so process all archives first in parallel to obtain the object files in them. - pool = get_multiprocessing_pool() - object_names_in_archives = pool.map(extract_archive_contents, archive_names) - - def clean_temporary_archive_contents_directory(directory): - def clean_at_exit(): - try_delete(directory) - if directory: - atexit.register(clean_at_exit) - - for n in range(len(archive_names)): - if object_names_in_archives[n]['returncode'] != 0: - raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!') - ar_contents[archive_names[n]] = object_names_in_archives[n]['files'] - clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir']) - - for o in object_names_in_archives: - for f in o['files']: - if f not in nm_cache: - object_names.append(f) - - # Next, extract symbols from all object files (either standalone or inside archives we just extracted) - # The results are not used here directly, but populated to llvm-nm cache structure. - llvm_nm_multiple(object_names) - - def llvm_backend_args(): # disable slow and relatively unimportant optimization passes args = ['-combiner-global-alias-analysis=false'] @@ -425,11 +306,7 @@ def llvm_backend_args(): def link_to_object(linker_inputs, target): - # link using lld unless LTO is requested (lld can't output LTO/bitcode object files). - if not Settings.LTO: - link_lld(linker_inputs + ['--relocatable'], target) - else: - link_bitcode(linker_inputs, target) + link_lld(linker_inputs + ['--relocatable'], target) def link_llvm(linker_inputs, target): @@ -557,144 +434,6 @@ def link_lld(args, target, external_symbol_list=None): check_call(cmd) -def link_bitcode(files, target, force_archive_contents=False): - # "Full-featured" linking: looks into archives (duplicates lld functionality) - actual_files = [] - # Tracking unresolveds is necessary for .a linking, see below. - # Specify all possible entry points to seed the linking process. - # For a simple application, this would just be "main". - unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS]) - resolved_symbols = set() - # Paths of already included object files from archives. - added_contents = set() - has_ar = False - for f in files: - if not f.startswith('-'): - has_ar = has_ar or is_ar(make_paths_absolute(f)) - - # If we have only one archive or the force_archive_contents flag is set, - # then we will add every object file we see, regardless of whether it - # resolves any undefined symbols. - force_add_all = len(files) == 1 or force_archive_contents - - # Considers an object file for inclusion in the link. The object is included - # if force_add=True or if the object provides a currently undefined symbol. - # If the object is included, the symbol tables are updated and the function - # returns True. - def consider_object(f, force_add=False): - new_symbols = llvm_nm(f) - # Check if the object was valid according to llvm-nm. It also accepts - # native object files. - if not new_symbols.is_valid_for_nm(): - diagnostics.warning('emcc', 'object %s is not valid according to llvm-nm, cannot link', f) - return False - # Check the object is valid for us, and not a native object file. - if not is_bitcode(f): - exit_with_error('unknown file type: %s', f) - provided = new_symbols.defs.union(new_symbols.commons) - do_add = force_add or not unresolved_symbols.isdisjoint(provided) - if do_add: - logger.debug('adding object %s to link (forced: %d)' % (f, force_add)) - # Update resolved_symbols table with newly resolved symbols - resolved_symbols.update(provided) - # Update unresolved_symbols table by adding newly unresolved symbols and - # removing newly resolved symbols. - unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols)) - unresolved_symbols.difference_update(provided) - actual_files.append(f) - return do_add - - # Traverse a single archive. The object files are repeatedly scanned for - # newly satisfied symbols until no new symbols are found. Returns true if - # any object files were added to the link. - def consider_archive(f, force_add): - added_any_objects = False - loop_again = True - logger.debug('considering archive %s' % (f)) - contents = ar_contents[f] - while loop_again: # repeatedly traverse until we have everything we need - loop_again = False - for content in contents: - if content in added_contents: - continue - # Link in the .o if it provides symbols, *or* this is a singleton archive (which is - # apparently an exception in gcc ld) - if consider_object(content, force_add=force_add): - added_contents.add(content) - loop_again = True - added_any_objects = True - logger.debug('done running loop of archive %s' % (f)) - return added_any_objects - - read_link_inputs([x for x in files if not x.startswith('-')]) - - # Rescan a group of archives until we don't find any more objects to link. - def scan_archive_group(group): - loop_again = True - logger.debug('starting archive group loop') - while loop_again: - loop_again = False - for archive in group: - if consider_archive(archive, force_add=False): - loop_again = True - logger.debug('done with archive group loop') - - current_archive_group = None - in_whole_archive = False - for f in files: - absolute_path_f = make_paths_absolute(f) - if f.startswith('-'): - if f in ['--start-group', '-(']: - assert current_archive_group is None, 'Nested --start-group, missing --end-group?' - current_archive_group = [] - elif f in ['--end-group', '-)']: - assert current_archive_group is not None, '--end-group without --start-group' - scan_archive_group(current_archive_group) - current_archive_group = None - elif f in ['--whole-archive', '-whole-archive']: - in_whole_archive = True - elif f in ['--no-whole-archive', '-no-whole-archive']: - in_whole_archive = False - else: - # Command line flags should already be vetted by the time this method - # is called, so this is an internal error - assert False, 'unsupported link flag: ' + f - elif is_ar(absolute_path_f): - # Extract object files from ar archives, and link according to gnu ld semantics - # (link in an entire .o from the archive if it supplies symbols still unresolved) - consider_archive(absolute_path_f, in_whole_archive or force_add_all) - # If we're inside a --start-group/--end-group section, add to the list - # so we can loop back around later. - if current_archive_group is not None: - current_archive_group.append(absolute_path_f) - elif is_bitcode(absolute_path_f): - if has_ar: - consider_object(f, force_add=True) - else: - # If there are no archives then we can simply link all valid object - # files and skip the symbol table stuff. - actual_files.append(f) - else: - exit_with_error('unknown file type: %s', f) - - # We have to consider the possibility that --start-group was used without a matching - # --end-group; GNU ld permits this behavior and implicitly treats the end of the - # command line as having an --end-group. - if current_archive_group: - logger.debug('--start-group without matching --end-group, rescanning') - scan_archive_group(current_archive_group) - current_archive_group = None - - try_delete(target) - - # Finish link - # tolerate people trying to link a.so a.so etc. - actual_files = unique_ordered(actual_files) - - logger.debug('emcc: linking: %s to %s', actual_files, target) - link_llvm(actual_files, target) - - def get_command_with_possible_response_file(cmd): # 8k is a bit of an arbitrary limit, but a reasonable one # for max command line size before we use a response file diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index f932bb01224e7..e324277e936e6 100755 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -295,7 +295,7 @@ def check_symbol_mapping(p): with ToolchainProfiler.profile_block('js_optimizer.split_to_chunks'): # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks - cores = building.get_num_cores() + cores = shared.get_num_cores() if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) @@ -330,22 +330,7 @@ def write_chunk(chunk, i): with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames): commands = [config.NODE_JS + [ACORN_OPTIMIZER, f] + passes for f in filenames] - - cores = min(cores, len(filenames)) - if len(chunks) > 1 and cores >= 2: - # We can parallelize - if DEBUG: - print('splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size / (1024 * 1024.)), file=sys.stderr) - with ToolchainProfiler.profile_block('optimizer_pool'): - pool = building.get_multiprocessing_pool() - filenames = pool.map(run_on_chunk, commands, chunksize=1) - else: - # We can't parallize, but still break into chunks to avoid node memory issues - if len(chunks) > 1 and DEBUG: - print('splitting up js optimization into %d chunks' % (len(chunks)), file=sys.stderr) - filenames = [run_on_chunk(command) for command in commands] - else: - filenames = [] + filenames = shared.run_multiple_processes(commands, route_stdout_to_temp_files_suffix='js_opt.jo.js') for filename in filenames: temp_files.note(filename) diff --git a/tools/shared.py b/tools/shared.py index f32031d9caaa4..fa9e7eed98414 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -32,6 +32,14 @@ from . import filelock +import signal + +def signal_handler(sig, frame): + sys.exit(1) + +signal.signal(signal.SIGINT, signal_handler) + + DEBUG = int(os.environ.get('EMCC_DEBUG', '0')) DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0')) EXPECTED_NODE_VERSION = (4, 1, 1) @@ -96,6 +104,41 @@ def run_process(cmd, check=True, input=None, *args, **kw): return ret +def get_num_cores(): + import multiprocessing + return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) + + +def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False): + std_outs = [] + with ToolchainProfiler.profile_block('parallel_run_js_optimizers'): + processes = [] + start = 0 + end = 0 + num_parallel_processes = get_num_cores() + temp_files = configuration.get_temp_files() + while start < len(commands): + if start + num_parallel_processes > end and end < len(commands): # Spawn a new process? + std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) + if DEBUG: + logger.debug('Running subprocess %d/%d: %s' % (end + 1, len(commands), ' '.join(commands[end]))) + processes += [subprocess.Popen(commands[end], stdout=std_out, env=child_env if child_env else os.environ.copy())] + if route_stdout_to_temp_files_suffix: + std_outs += [std_out.name] + elif pipe_stdout: + std_outs += [std_out] + end += 1 + else: + # Too many commands running in parallel, wait for one to finish. + out, err = processes[start].communicate() + if processes[start].returncode != 0: + if out: logger.info(out.decode('UTF-8')) + if err: logger.error(err.decode('UTF-8')) + raise Exception('Subprocess %d/%d failed with return code %d!' % (start + 1, len(commands), processes[start].returncode)) + start += 1 + return std_outs + + def check_call(cmd, *args, **kw): """Like `run_process` above but treat failures as fatal and exit_with_error.""" print_compiler_stage(cmd) diff --git a/tools/system_libs.py b/tools/system_libs.py index bcb585bc7061e..74a088e9fb079 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -13,6 +13,7 @@ import sys from glob import iglob +from .toolchain_profiler import ToolchainProfiler from . import shared, building, ports, config, utils from . import deps_info, tempfiles from . import diagnostics @@ -78,35 +79,39 @@ def clean_env(): return safe_env -def run_one_command(cmd): - # Helper function used by run_build_commands. - if shared.EM_BUILD_VERBOSE: - print(shared.shlex_join(cmd)) - # TODO(sbc): Remove this one we remove the test_em_config_env_var test - cmd.append('-Wno-deprecated') - try: - shared.run_process(cmd, env=clean_env()) - except subprocess.CalledProcessError as e: - print("'%s' failed (%d)" % (shared.shlex_join(e.cmd), e.returncode)) - raise - - def run_build_commands(commands): # Before running a set of build commands make sure the common sysroot # headers are installed. This prevents each sub-process from attempting # to setup the sysroot itself. ensure_sysroot() - cores = min(len(commands), building.get_num_cores()) - if cores <= 1 or shared.DEBUG: - for command in commands: - run_one_command(command) - else: - pool = building.get_multiprocessing_pool() - # https://stackoverflow.com/questions/1408356/keyboard-interrupts-with-pythons-multiprocessing-pool - # https://bugs.python.org/issue8296 - # 999999 seconds (about 11 days) is reasonably huge to not trigger actual timeout - # and is smaller than the maximum timeout value 4294967.0 for Python 3 on Windows (threading.TIMEOUT_MAX) - pool.map_async(run_one_command, commands, chunksize=1).get(999999) + + + + safe_env = clean_env() # We already did a sanity check launching the compiler once, no need to launch the compiler + # again on each child subprocess spawn. + safe_env['EMCC_SKIP_SANITY_CHECK'] = '1' + + # If we got spawned by ccache, then launch subprocesses in ccache as well. + if 'EMCC_CCACHE_' in safe_env: + safe_env['EMCC_CCACHE'] = '1' + + for i in range(len(commands)): + # TODO(sbc): Remove this one we remove the test_em_config_env_var test + commands[i].append('-Wno-deprecated') + + # For subprocess spawns, do not route via the OS batch script launcher, but directly + # spawn the python script. This saves ~2 seconds on libc build. + # However if we are using ccache, we must use the wrappers, since they dispatch + # execution to ccache executable. + if 'EMCC_CCACHE' not in safe_env: + if commands[i][0].endswith('emcc.bat'): + commands[i][0] = commands[i][0].replace('emcc.bat', 'emcc.py') + commands[i] = [sys.executable] + commands[i] + elif commands[i][0].endswith('emcc'): + commands[i][0] = commands[i][0].replace('emcc', 'emcc.py') + commands[i] = [sys.executable] + commands[i] + + shared.run_multiple_processes(commands) def create_lib(libname, inputs): @@ -1962,4 +1967,5 @@ def install_system_headers(stamp): def ensure_sysroot(): - shared.Cache.get('sysroot_install.stamp', install_system_headers, what='system headers') + with ToolchainProfiler.profile_block('ensure_sysroot'): + shared.Cache.get('sysroot_install.stamp', install_system_headers, what='system headers') From a5d8ed7711d7be82d85241e401f9435d7966250e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:29:23 +0200 Subject: [PATCH 02/26] Change llvm_nm_multiple() to use run_multiple_processes() --- tools/building.py | 36 +++++++++++++++++++----------------- tools/shared.py | 8 ++++---- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tools/building.py b/tools/building.py index ffdcd46931f21..87927f8a97e42 100644 --- a/tools/building.py +++ b/tools/building.py @@ -206,17 +206,26 @@ def llvm_nm_multiple(files): with ToolchainProfiler.profile_block('llvm_nm_multiple'): if len(files) == 0: return [] - # Run llvm-nm on files that we haven't cached yet + + # Run llvm-nm only files that we haven't cached yet llvm_nm_files = [f for f in files if f not in nm_cache] # We can issue multiple files in a single llvm-nm calls, but only if those # files are all .o or .bc files. Because of llvm-nm output format, we cannot # llvm-nm multiple .a files in one call, but those must be individually checked. - if len(llvm_nm_files) > 1: - llvm_nm_files = [f for f in files if f.endswith('.o') or f.endswith('.bc')] - if len(llvm_nm_files) > 0: - cmd = [LLVM_NM] + llvm_nm_files + o_files = [f for f in llvm_nm_files if os.path.splitext(f)[1].lower() in ['.o', '.obj', '.bc']] + a_files = [f for f in llvm_nm_files if f not in o_files] + + # Issue parallel calls for .a files + if len(a_files) > 0: + results = shared.run_multiple_processes([[LLVM_NM, a] for a in a_files], pipe_stdout=True, check=False) + for i in range(len(results)): + nm_cache[a_files[i]] = parse_symbols(results[i]) + + # Issue a single call for multiple .o files + if len(o_files) > 0: + cmd = [LLVM_NM] + o_files cmd = get_command_with_possible_response_file(cmd) results = run_process(cmd, stdout=PIPE, stderr=PIPE, check=False) @@ -240,11 +249,11 @@ def llvm_nm_multiple(files): # so loop over the report to extract the results # for each individual file. - filename = llvm_nm_files[0] + filename = o_files[0] # When we dispatched more than one file, we must manually parse # the file result delimiters (like shown structured above) - if len(llvm_nm_files) > 1: + if len(o_files) > 1: file_start = 0 i = 0 @@ -261,18 +270,11 @@ def llvm_nm_multiple(files): nm_cache[filename] = parse_symbols(results[file_start:]) else: - # We only dispatched a single file, we can just parse that directly - # to the output. + # We only dispatched a single file, so can parse all of the result directly + # to that file. nm_cache[filename] = parse_symbols(results) - # Any .a files that have multiple .o files will have hard time parsing. Scan those - # sequentially to confirm. TODO: Move this to use run_multiple_processes() - # when available. - for f in files: - if f not in nm_cache: - nm_cache[f] = llvm_nm(f) - - return [nm_cache[f] for f in files] + return [nm_cache[f] if f in nm_cache else ObjectFileInfo(1, '') for f in files] def llvm_nm(file): diff --git a/tools/shared.py b/tools/shared.py index fa9e7eed98414..fe53bb490f5f0 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -109,7 +109,7 @@ def get_num_cores(): return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) -def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False): +def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True): std_outs = [] with ToolchainProfiler.profile_block('parallel_run_js_optimizers'): processes = [] @@ -125,13 +125,13 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ processes += [subprocess.Popen(commands[end], stdout=std_out, env=child_env if child_env else os.environ.copy())] if route_stdout_to_temp_files_suffix: std_outs += [std_out.name] - elif pipe_stdout: - std_outs += [std_out] end += 1 else: # Too many commands running in parallel, wait for one to finish. out, err = processes[start].communicate() - if processes[start].returncode != 0: + if pipe_stdout: + std_outs += out.decode('UTF-8') + if check and processes[start].returncode != 0: if out: logger.info(out.decode('UTF-8')) if err: logger.error(err.decode('UTF-8')) raise Exception('Subprocess %d/%d failed with return code %d!' % (start + 1, len(commands), processes[start].returncode)) From b9685b012c99add6f9f2e9eee803c6b562e35acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:34:55 +0200 Subject: [PATCH 03/26] flake --- tools/building.py | 1 - tools/shared.py | 22 ++++++++++++---------- tools/system_libs.py | 1 - 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/building.py b/tools/building.py index 87927f8a97e42..e3a05b0a34478 100644 --- a/tools/building.py +++ b/tools/building.py @@ -3,7 +3,6 @@ # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. -import atexit import json import logging import os diff --git a/tools/shared.py b/tools/shared.py index fe53bb490f5f0..86377779f9a8b 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -32,14 +32,6 @@ from . import filelock -import signal - -def signal_handler(sig, frame): - sys.exit(1) - -signal.signal(signal.SIGINT, signal_handler) - - DEBUG = int(os.environ.get('EMCC_DEBUG', '0')) DEBUG_SAVE = DEBUG or int(os.environ.get('EMCC_DEBUG_SAVE', '0')) EXPECTED_NODE_VERSION = (4, 1, 1) @@ -111,6 +103,14 @@ def get_num_cores(): def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True): std_outs = [] + + # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt + # when multiple child processes have been spawned. + #import signal + #def signal_handler(sig, frame): + # sys.exit(1) + #signal.signal(signal.SIGINT, signal_handler) + with ToolchainProfiler.profile_block('parallel_run_js_optimizers'): processes = [] start = 0 @@ -132,8 +132,10 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ if pipe_stdout: std_outs += out.decode('UTF-8') if check and processes[start].returncode != 0: - if out: logger.info(out.decode('UTF-8')) - if err: logger.error(err.decode('UTF-8')) + if out: + logger.info(out.decode('UTF-8')) + if err: + logger.error(err.decode('UTF-8')) raise Exception('Subprocess %d/%d failed with return code %d!' % (start + 1, len(commands), processes[start].returncode)) start += 1 return std_outs diff --git a/tools/system_libs.py b/tools/system_libs.py index 74a088e9fb079..513560ce3a31a 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -9,7 +9,6 @@ import logging import os import shutil -import subprocess import sys from glob import iglob From 806ac77964072cf8d0333832e4a78c85f7f6a548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:37:14 +0200 Subject: [PATCH 04/26] fix stdout pipe --- tools/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index 86377779f9a8b..b6485d99f6579 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -130,7 +130,7 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ # Too many commands running in parallel, wait for one to finish. out, err = processes[start].communicate() if pipe_stdout: - std_outs += out.decode('UTF-8') + std_outs += [out.decode('UTF-8')] if check and processes[start].returncode != 0: if out: logger.info(out.decode('UTF-8')) From 2ac197c1f6d1b58259514166941704a782560ea0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:40:30 +0200 Subject: [PATCH 05/26] flake --- tools/shared.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index b6485d99f6579..ed67e6826a6b5 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -106,10 +106,10 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt # when multiple child processes have been spawned. - #import signal - #def signal_handler(sig, frame): - # sys.exit(1) - #signal.signal(signal.SIGINT, signal_handler) + # import signal + # def signal_handler(sig, frame): + # sys.exit(1) + # signal.signal(signal.SIGINT, signal_handler) with ToolchainProfiler.profile_block('parallel_run_js_optimizers'): processes = [] From 76f7fca226b855d97b8a7f84a60a9d578a091730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:43:37 +0200 Subject: [PATCH 06/26] Profile block string run_multiple_processes --- tools/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index ed67e6826a6b5..27dde79f21a78 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -111,7 +111,7 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ # sys.exit(1) # signal.signal(signal.SIGINT, signal_handler) - with ToolchainProfiler.profile_block('parallel_run_js_optimizers'): + with ToolchainProfiler.profile_block('run_multiple_processes'): processes = [] start = 0 end = 0 From ed17d8e1bebd3ce15b01e4ff9b55986e3a069ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 10:51:54 +0200 Subject: [PATCH 07/26] Remove unnecessary EMCC_SKIP_SANITY_CHECK set --- tools/system_libs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/system_libs.py b/tools/system_libs.py index 513560ce3a31a..ed261b906d1b1 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -86,9 +86,7 @@ def run_build_commands(commands): - safe_env = clean_env() # We already did a sanity check launching the compiler once, no need to launch the compiler - # again on each child subprocess spawn. - safe_env['EMCC_SKIP_SANITY_CHECK'] = '1' + safe_env = clean_env() # If we got spawned by ccache, then launch subprocesses in ccache as well. if 'EMCC_CCACHE_' in safe_env: From 78a40fcd4eec4c4b24ad88dd8f324317cfca24f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 14 Feb 2021 16:25:20 +0200 Subject: [PATCH 08/26] Remove EMCC_CCACHE related code. --- tools/system_libs.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tools/system_libs.py b/tools/system_libs.py index ed261b906d1b1..193ab5521acea 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -88,25 +88,18 @@ def run_build_commands(commands): safe_env = clean_env() - # If we got spawned by ccache, then launch subprocesses in ccache as well. - if 'EMCC_CCACHE_' in safe_env: - safe_env['EMCC_CCACHE'] = '1' - for i in range(len(commands)): # TODO(sbc): Remove this one we remove the test_em_config_env_var test commands[i].append('-Wno-deprecated') # For subprocess spawns, do not route via the OS batch script launcher, but directly # spawn the python script. This saves ~2 seconds on libc build. - # However if we are using ccache, we must use the wrappers, since they dispatch - # execution to ccache executable. - if 'EMCC_CCACHE' not in safe_env: - if commands[i][0].endswith('emcc.bat'): - commands[i][0] = commands[i][0].replace('emcc.bat', 'emcc.py') - commands[i] = [sys.executable] + commands[i] - elif commands[i][0].endswith('emcc'): - commands[i][0] = commands[i][0].replace('emcc', 'emcc.py') - commands[i] = [sys.executable] + commands[i] + if commands[i][0].endswith('emcc.bat'): + commands[i][0] = commands[i][0].replace('emcc.bat', 'emcc.py') + commands[i] = [sys.executable] + commands[i] + elif commands[i][0].endswith('emcc'): + commands[i][0] = commands[i][0].replace('emcc', 'emcc.py') + commands[i] = [sys.executable] + commands[i] shared.run_multiple_processes(commands) From aa1588c9afd7e7d09f8c35ba2fac2d4357b8c2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 12:08:12 +0300 Subject: [PATCH 09/26] Restore bitcode linking. --- tools/building.py | 208 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 207 insertions(+), 1 deletion(-) diff --git a/tools/building.py b/tools/building.py index e3a05b0a34478..f8e970de618c8 100644 --- a/tools/building.py +++ b/tools/building.py @@ -140,6 +140,30 @@ def clear(): _is_ar_cache.clear() +def get_multiprocessing_pool(): + class FakeMultiprocessor(object): + def map(self, func, tasks, *args, **kwargs): + results = [] + for t in tasks: + results += [func(t)] + return results + + def map_async(self, func, tasks, *args, **kwargs): + class Result: + def __init__(self, func, tasks): + self.func = func + self.tasks = tasks + + def get(self, timeout): + results = [] + for t in tasks: + results += [func(t)] + return results + + return Result(func, tasks) + + return FakeMultiprocessor() + # .. but for Popen, we cannot have doublequotes, so provide functionality to # remove them when needed. def remove_quotes(arg): @@ -280,6 +304,46 @@ def llvm_nm(file): return llvm_nm_multiple([file])[0] +def read_link_inputs(files): + with ToolchainProfiler.profile_block('read_link_inputs'): + # Before performing the link, we need to look at each input file to determine which symbols + # each of them provides. Do this in multiple parallel processes. + archive_names = [] # .a files passed in to the command line to the link + object_names = [] # .o/.bc files passed in to the command line to the link + for f in files: + absolute_path_f = make_paths_absolute(f) + + if absolute_path_f not in ar_contents and is_ar(absolute_path_f): + archive_names.append(absolute_path_f) + elif absolute_path_f not in nm_cache and is_bitcode(absolute_path_f): + object_names.append(absolute_path_f) + + # Archives contain objects, so process all archives first in parallel to obtain the object files in them. + pool = get_multiprocessing_pool() + object_names_in_archives = pool.map(extract_archive_contents, archive_names) + + def clean_temporary_archive_contents_directory(directory): + def clean_at_exit(): + try_delete(directory) + if directory: + atexit.register(clean_at_exit) + + for n in range(len(archive_names)): + if object_names_in_archives[n]['returncode'] != 0: + raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!') + ar_contents[archive_names[n]] = object_names_in_archives[n]['files'] + clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir']) + + for o in object_names_in_archives: + for f in o['files']: + if f not in nm_cache: + object_names.append(f) + + # Next, extract symbols from all object files (either standalone or inside archives we just extracted) + # The results are not used here directly, but populated to llvm-nm cache structure. + llvm_nm_multiple(object_names) + + def llvm_backend_args(): # disable slow and relatively unimportant optimization passes args = ['-combiner-global-alias-analysis=false'] @@ -307,7 +371,11 @@ def llvm_backend_args(): def link_to_object(linker_inputs, target): - link_lld(linker_inputs + ['--relocatable'], target) + # link using lld unless LTO is requested (lld can't output LTO/bitcode object files). + if not Settings.LTO: + link_lld(linker_inputs + ['--relocatable'], target) + else: + link_bitcode(linker_inputs, target) def link_llvm(linker_inputs, target): @@ -435,6 +503,144 @@ def link_lld(args, target, external_symbol_list=None): check_call(cmd) +def link_bitcode(files, target, force_archive_contents=False): + # "Full-featured" linking: looks into archives (duplicates lld functionality) + actual_files = [] + # Tracking unresolveds is necessary for .a linking, see below. + # Specify all possible entry points to seed the linking process. + # For a simple application, this would just be "main". + unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS]) + resolved_symbols = set() + # Paths of already included object files from archives. + added_contents = set() + has_ar = False + for f in files: + if not f.startswith('-'): + has_ar = has_ar or is_ar(make_paths_absolute(f)) + + # If we have only one archive or the force_archive_contents flag is set, + # then we will add every object file we see, regardless of whether it + # resolves any undefined symbols. + force_add_all = len(files) == 1 or force_archive_contents + + # Considers an object file for inclusion in the link. The object is included + # if force_add=True or if the object provides a currently undefined symbol. + # If the object is included, the symbol tables are updated and the function + # returns True. + def consider_object(f, force_add=False): + new_symbols = llvm_nm(f) + # Check if the object was valid according to llvm-nm. It also accepts + # native object files. + if not new_symbols.is_valid_for_nm(): + diagnostics.warning('emcc', 'object %s is not valid according to llvm-nm, cannot link', f) + return False + # Check the object is valid for us, and not a native object file. + if not is_bitcode(f): + exit_with_error('unknown file type: %s', f) + provided = new_symbols.defs.union(new_symbols.commons) + do_add = force_add or not unresolved_symbols.isdisjoint(provided) + if do_add: + logger.debug('adding object %s to link (forced: %d)' % (f, force_add)) + # Update resolved_symbols table with newly resolved symbols + resolved_symbols.update(provided) + # Update unresolved_symbols table by adding newly unresolved symbols and + # removing newly resolved symbols. + unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols)) + unresolved_symbols.difference_update(provided) + actual_files.append(f) + return do_add + + # Traverse a single archive. The object files are repeatedly scanned for + # newly satisfied symbols until no new symbols are found. Returns true if + # any object files were added to the link. + def consider_archive(f, force_add): + added_any_objects = False + loop_again = True + logger.debug('considering archive %s' % (f)) + contents = ar_contents[f] + while loop_again: # repeatedly traverse until we have everything we need + loop_again = False + for content in contents: + if content in added_contents: + continue + # Link in the .o if it provides symbols, *or* this is a singleton archive (which is + # apparently an exception in gcc ld) + if consider_object(content, force_add=force_add): + added_contents.add(content) + loop_again = True + added_any_objects = True + logger.debug('done running loop of archive %s' % (f)) + return added_any_objects + + read_link_inputs([x for x in files if not x.startswith('-')]) + + # Rescan a group of archives until we don't find any more objects to link. + def scan_archive_group(group): + loop_again = True + logger.debug('starting archive group loop') + while loop_again: + loop_again = False + for archive in group: + if consider_archive(archive, force_add=False): + loop_again = True + logger.debug('done with archive group loop') + + current_archive_group = None + in_whole_archive = False + for f in files: + absolute_path_f = make_paths_absolute(f) + if f.startswith('-'): + if f in ['--start-group', '-(']: + assert current_archive_group is None, 'Nested --start-group, missing --end-group?' + current_archive_group = [] + elif f in ['--end-group', '-)']: + assert current_archive_group is not None, '--end-group without --start-group' + scan_archive_group(current_archive_group) + current_archive_group = None + elif f in ['--whole-archive', '-whole-archive']: + in_whole_archive = True + elif f in ['--no-whole-archive', '-no-whole-archive']: + in_whole_archive = False + else: + # Command line flags should already be vetted by the time this method + # is called, so this is an internal error + assert False, 'unsupported link flag: ' + f + elif is_ar(absolute_path_f): + # Extract object files from ar archives, and link according to gnu ld semantics + # (link in an entire .o from the archive if it supplies symbols still unresolved) + consider_archive(absolute_path_f, in_whole_archive or force_add_all) + # If we're inside a --start-group/--end-group section, add to the list + # so we can loop back around later. + if current_archive_group is not None: + current_archive_group.append(absolute_path_f) + elif is_bitcode(absolute_path_f): + if has_ar: + consider_object(f, force_add=True) + else: + # If there are no archives then we can simply link all valid object + # files and skip the symbol table stuff. + actual_files.append(f) + else: + exit_with_error('unknown file type: %s', f) + + # We have to consider the possibility that --start-group was used without a matching + # --end-group; GNU ld permits this behavior and implicitly treats the end of the + # command line as having an --end-group. + if current_archive_group: + logger.debug('--start-group without matching --end-group, rescanning') + scan_archive_group(current_archive_group) + current_archive_group = None + + try_delete(target) + + # Finish link + # tolerate people trying to link a.so a.so etc. + actual_files = unique_ordered(actual_files) + + logger.debug('emcc: linking: %s to %s', actual_files, target) + link_llvm(actual_files, target) + + def get_command_with_possible_response_file(cmd): # 8k is a bit of an arbitrary limit, but a reasonable one # for max command line size before we use a response file From 13b8912ce6430fa21975abc2e425f23dc44258b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 12:14:06 +0300 Subject: [PATCH 10/26] cleanup --- tools/building.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/building.py b/tools/building.py index f8e970de618c8..78a366a75fdb2 100644 --- a/tools/building.py +++ b/tools/building.py @@ -229,7 +229,6 @@ def llvm_nm_multiple(files): with ToolchainProfiler.profile_block('llvm_nm_multiple'): if len(files) == 0: return [] - # Run llvm-nm only files that we haven't cached yet llvm_nm_files = [f for f in files if f not in nm_cache] @@ -246,7 +245,7 @@ def llvm_nm_multiple(files): for i in range(len(results)): nm_cache[a_files[i]] = parse_symbols(results[i]) - # Issue a single call for multiple .o files + # Issue a single batch call for multiple .o files if len(o_files) > 0: cmd = [LLVM_NM] + o_files cmd = get_command_with_possible_response_file(cmd) From 83687edbd5a9004d910a8b66fd3c6f1ff3db9782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 13:57:50 +0300 Subject: [PATCH 11/26] Rewrite bitcode linking to avoid python multiprocessing pool. --- tools/building.py | 127 +++++++++++++++--------------------------- tools/js_optimizer.py | 38 +++---------- tools/shared.py | 10 +++- 3 files changed, 60 insertions(+), 115 deletions(-) diff --git a/tools/building.py b/tools/building.py index 78a366a75fdb2..4106d69bf1f5a 100644 --- a/tools/building.py +++ b/tools/building.py @@ -74,46 +74,45 @@ def warn_if_duplicate_entries(archive_contents, archive_filename): diagnostics.warning('emcc', msg) -# This function creates a temporary directory specified by the 'dir' field in -# the returned dictionary. Caller is responsible for cleaning up those files -# after done. -def extract_archive_contents(archive_file): - lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines() - # ignore empty lines - contents = [l for l in lines if len(l)] - if len(contents) == 0: - logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file) - return { - 'returncode': 0, - 'dir': None, - 'files': [] - } - - # `ar` files can only contains filenames. Just to be sure, verify that each - # file has only as filename component and is not absolute - for f in contents: - assert not os.path.dirname(f) - assert not os.path.isabs(f) - - warn_if_duplicate_entries(contents, archive_file) - - # create temp dir - temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_') - - # extract file in temp dir - proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir) - abs_contents = [os.path.join(temp_dir, c) for c in contents] +# Extracts the given list of archive files and outputs their contents +def extract_archive_contents(archive_files): + archive_results = shared.run_multiple_processes([[LLVM_AR, 't', a] for a in archive_files], pipe_stdout=True) + + unpack_temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_') + def clean_at_exit(): + try_delete(unpack_temp_dir) + shared.atexit.register(clean_at_exit) + + archive_contents = [] + + for i in range(len(archive_results)): + a = archive_results[i] + contents = [l for l in a.splitlines() if len(l)] + if len(contents) == 0: + logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % a) + + # `ar` files can only contains filenames. Just to be sure, verify that each + # file has only as filename component and is not absolute + for f in contents: + assert not os.path.dirname(f) + assert not os.path.isabs(f) + + warn_if_duplicate_entries(contents, a) + + archive_contents += [{ + 'archive_name': archive_files[i], + 'o_files': [os.path.join(unpack_temp_dir, c) for c in contents] + }] + + shared.run_multiple_processes([[LLVM_AR, 'xo', a] for a in archive_files], cwd=unpack_temp_dir) # check that all files were created - missing_contents = [x for x in abs_contents if not os.path.exists(x)] - if missing_contents: - exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout)) - - return { - 'returncode': proc.returncode, - 'dir': temp_dir, - 'files': abs_contents - } + for a in archive_contents: + missing_contents = [x for x in a['o_files'] if not os.path.exists(x)] + if missing_contents: + exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout)) + + return archive_contents def unique_ordered(values): @@ -140,30 +139,6 @@ def clear(): _is_ar_cache.clear() -def get_multiprocessing_pool(): - class FakeMultiprocessor(object): - def map(self, func, tasks, *args, **kwargs): - results = [] - for t in tasks: - results += [func(t)] - return results - - def map_async(self, func, tasks, *args, **kwargs): - class Result: - def __init__(self, func, tasks): - self.func = func - self.tasks = tasks - - def get(self, timeout): - results = [] - for t in tasks: - results += [func(t)] - return results - - return Result(func, tasks) - - return FakeMultiprocessor() - # .. but for Popen, we cannot have doublequotes, so provide functionality to # remove them when needed. def remove_quotes(arg): @@ -229,7 +204,7 @@ def llvm_nm_multiple(files): with ToolchainProfiler.profile_block('llvm_nm_multiple'): if len(files) == 0: return [] - # Run llvm-nm only files that we haven't cached yet + # Run llvm-nm on files that we haven't cached yet llvm_nm_files = [f for f in files if f not in nm_cache] # We can issue multiple files in a single llvm-nm calls, but only if those @@ -318,25 +293,13 @@ def read_link_inputs(files): object_names.append(absolute_path_f) # Archives contain objects, so process all archives first in parallel to obtain the object files in them. - pool = get_multiprocessing_pool() - object_names_in_archives = pool.map(extract_archive_contents, archive_names) - - def clean_temporary_archive_contents_directory(directory): - def clean_at_exit(): - try_delete(directory) - if directory: - atexit.register(clean_at_exit) - - for n in range(len(archive_names)): - if object_names_in_archives[n]['returncode'] != 0: - raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!') - ar_contents[archive_names[n]] = object_names_in_archives[n]['files'] - clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir']) - - for o in object_names_in_archives: - for f in o['files']: - if f not in nm_cache: - object_names.append(f) + archive_contents = extract_archive_contents(archive_names) + + for a in archive_contents: + ar_contents[os.path.abspath(a['archive_name'])] = a['o_files'] + for o in a['o_files']: + if o not in nm_cache: + object_names.append(o) # Next, extract symbols from all object files (either standalone or inside archives we just extracted) # The results are not used here directly, but populated to llvm-nm cache structure. diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index e324277e936e6..349a9f10552d2 100755 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -130,36 +130,6 @@ def serialize(self): end_asm_marker = '// EMSCRIPTEN_END_ASM\n' -def run_on_chunk(command): - try: - if ACORN_OPTIMIZER in command: # XXX hackish - index = command.index(ACORN_OPTIMIZER) - filename = command[index + 1] - else: - filename = command[1] - if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0': - saved = 'save_' + os.path.basename(filename) - while os.path.exists(saved): - saved = 'input' + str(int(saved.replace('input', '').replace('.txt', '')) + 1) + '.txt' - print('running js optimizer command', ' '.join([c if c != filename else saved for c in command]), file=sys.stderr) - shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved)) - if shared.EM_BUILD_VERBOSE >= 3: - print('run_on_chunk: ' + str(command), file=sys.stderr) - proc = shared.run_process(command, stdout=subprocess.PIPE) - output = proc.stdout - assert proc.returncode == 0, 'Error in optimizer (return code ' + str(proc.returncode) + '): ' + output - assert len(output) and not output.startswith('Assertion failed'), 'Error in optimizer: ' + output - filename = temp_files.get(os.path.basename(filename) + '.jo.js').name - with open(filename, 'w') as f: - f.write(output) - if DEBUG and not shared.WINDOWS: - print('.', file=sys.stderr) # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console. - return filename - except KeyboardInterrupt: - # avoid throwing keyboard interrupts from a child process - raise Exception() - - # Given a set of functions of form (ident, text), and a preferred chunk size, # generates a set of chunks for parallel processing and caching. def chunkify(funcs, chunk_size): @@ -330,6 +300,14 @@ def write_chunk(chunk, i): with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames): commands = [config.NODE_JS + [ACORN_OPTIMIZER, f] + passes for f in filenames] + + if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0': + for filename in filenames: + saved = 'save_' + os.path.basename(filename) + while os.path.exists(saved): + saved = 'input' + str(int(saved.replace('input', '').replace('.txt', '')) + 1) + '.txt' + shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved)) + filenames = shared.run_multiple_processes(commands, route_stdout_to_temp_files_suffix='js_opt.jo.js') for filename in filenames: diff --git a/tools/shared.py b/tools/shared.py index 27dde79f21a78..d93cc4b87b2f6 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -101,7 +101,11 @@ def get_num_cores(): return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) -def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True): +# Runs multiple subprocess commands. +# bool 'check': If True (default), raises an exception if any of the subprocesses failed with a nonzero exit code. +# string 'route_stdout_to_temp_files_suffix': if not None, all stdouts are instead written to files, and an array of filenames is returned. +# bool 'pipe_stdout': If True, an array of stdouts is returned, for each subprocess. +def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): std_outs = [] # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt @@ -122,7 +126,7 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) if DEBUG: logger.debug('Running subprocess %d/%d: %s' % (end + 1, len(commands), ' '.join(commands[end]))) - processes += [subprocess.Popen(commands[end], stdout=std_out, env=child_env if child_env else os.environ.copy())] + processes += [subprocess.Popen(commands[end], stdout=std_out, env=child_env if child_env else os.environ.copy(), cwd=cwd)] if route_stdout_to_temp_files_suffix: std_outs += [std_out.name] end += 1 @@ -136,7 +140,7 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ logger.info(out.decode('UTF-8')) if err: logger.error(err.decode('UTF-8')) - raise Exception('Subprocess %d/%d failed with return code %d!' % (start + 1, len(commands), processes[start].returncode)) + raise Exception('Subprocess %d/%d failed with return code %d! (cmdline: %s)' % (start + 1, len(commands), processes[start].returncode, shlex_join(commands[start]))) start += 1 return std_outs From 8bcbef91a1f2b952aaaac607cef1d653ce9e6c2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 14:01:55 +0300 Subject: [PATCH 12/26] Flake --- tools/building.py | 5 +++-- tools/shared.py | 4 ++-- tools/system_libs.py | 6 +----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/building.py b/tools/building.py index 4106d69bf1f5a..62a3c4d731898 100644 --- a/tools/building.py +++ b/tools/building.py @@ -12,7 +12,7 @@ import subprocess import sys import tempfile -from subprocess import STDOUT, PIPE +from subprocess import PIPE from . import diagnostics from . import response_file @@ -79,6 +79,7 @@ def extract_archive_contents(archive_files): archive_results = shared.run_multiple_processes([[LLVM_AR, 't', a] for a in archive_files], pipe_stdout=True) unpack_temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_') + def clean_at_exit(): try_delete(unpack_temp_dir) shared.atexit.register(clean_at_exit) @@ -110,7 +111,7 @@ def clean_at_exit(): for a in archive_contents: missing_contents = [x for x in a['o_files'] if not os.path.exists(x)] if missing_contents: - exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout)) + exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '!') return archive_contents diff --git a/tools/shared.py b/tools/shared.py index d93cc4b87b2f6..17e814732ca28 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -105,7 +105,7 @@ def get_num_cores(): # bool 'check': If True (default), raises an exception if any of the subprocesses failed with a nonzero exit code. # string 'route_stdout_to_temp_files_suffix': if not None, all stdouts are instead written to files, and an array of filenames is returned. # bool 'pipe_stdout': If True, an array of stdouts is returned, for each subprocess. -def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): +def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): std_outs = [] # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt @@ -126,7 +126,7 @@ def run_multiple_processes(commands, child_env=None, route_stdout_to_temp_files_ std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) if DEBUG: logger.debug('Running subprocess %d/%d: %s' % (end + 1, len(commands), ' '.join(commands[end]))) - processes += [subprocess.Popen(commands[end], stdout=std_out, env=child_env if child_env else os.environ.copy(), cwd=cwd)] + processes += [subprocess.Popen(commands[end], stdout=std_out, env=env, cwd=cwd)] if route_stdout_to_temp_files_suffix: std_outs += [std_out.name] end += 1 diff --git a/tools/system_libs.py b/tools/system_libs.py index 193ab5521acea..d7b2cc29fb97b 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -84,10 +84,6 @@ def run_build_commands(commands): # to setup the sysroot itself. ensure_sysroot() - - - safe_env = clean_env() - for i in range(len(commands)): # TODO(sbc): Remove this one we remove the test_em_config_env_var test commands[i].append('-Wno-deprecated') @@ -101,7 +97,7 @@ def run_build_commands(commands): commands[i][0] = commands[i][0].replace('emcc', 'emcc.py') commands[i] = [sys.executable] + commands[i] - shared.run_multiple_processes(commands) + shared.run_multiple_processes(commands, env=clean_env()) def create_lib(libname, inputs): From 5358095c3c71f974e92c4def1e6897ac18af4164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 14:15:46 +0300 Subject: [PATCH 13/26] Remove direct .py spawn in system_libs.py which prevents ccache. --- tools/system_libs.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tools/system_libs.py b/tools/system_libs.py index d7b2cc29fb97b..15880b4e2db5e 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -88,15 +88,6 @@ def run_build_commands(commands): # TODO(sbc): Remove this one we remove the test_em_config_env_var test commands[i].append('-Wno-deprecated') - # For subprocess spawns, do not route via the OS batch script launcher, but directly - # spawn the python script. This saves ~2 seconds on libc build. - if commands[i][0].endswith('emcc.bat'): - commands[i][0] = commands[i][0].replace('emcc.bat', 'emcc.py') - commands[i] = [sys.executable] + commands[i] - elif commands[i][0].endswith('emcc'): - commands[i][0] = commands[i][0].replace('emcc', 'emcc.py') - commands[i] = [sys.executable] + commands[i] - shared.run_multiple_processes(commands, env=clean_env()) From b43bcdf9524ae55d5f5fea3161dcb0a265a90f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Sun, 28 Mar 2021 14:20:56 +0300 Subject: [PATCH 14/26] Use os.cpu_count() instead of importing multiprocessing --- tools/shared.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index 17e814732ca28..2f220ce504ca4 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -97,8 +97,7 @@ def run_process(cmd, check=True, input=None, *args, **kw): def get_num_cores(): - import multiprocessing - return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) + return int(os.environ.get('EMCC_CORES', os.cpu_count())) # Runs multiple subprocess commands. From df359385a9dc1bb486795a9fe903f21ec58c1170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Tue, 30 Mar 2021 00:01:30 +0300 Subject: [PATCH 15/26] Rewrite run_multiple_processes() to avoid start and end iterators. --- tools/shared.py | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index 2f220ce504ca4..b2016d07da1cc 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -116,32 +116,48 @@ def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp with ToolchainProfiler.profile_block('run_multiple_processes'): processes = [] - start = 0 - end = 0 num_parallel_processes = get_num_cores() temp_files = configuration.get_temp_files() - while start < len(commands): - if start + num_parallel_processes > end and end < len(commands): # Spawn a new process? + i = 0 + num_completed = 0 + while num_completed < len(commands): + if i < len(commands) and len(processes) < num_parallel_processes: + # Not enough parallel processes running, spawn a new one. std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) if DEBUG: - logger.debug('Running subprocess %d/%d: %s' % (end + 1, len(commands), ' '.join(commands[end]))) - processes += [subprocess.Popen(commands[end], stdout=std_out, env=env, cwd=cwd)] + logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) + processes += [(i, subprocess.Popen(commands[i], stdout=std_out, env=env, cwd=cwd))] if route_stdout_to_temp_files_suffix: - std_outs += [std_out.name] - end += 1 + std_outs += [(i, std_out.name)] + i += 1 else: - # Too many commands running in parallel, wait for one to finish. - out, err = processes[start].communicate() + # Not spawning a new process (Too many commands running in parallel, or no commands left): find if a process has finished. + def get_finished_process(): + j = 0 + while j < len(processes): + if processes[j][1].poll() is not None: + return j + j += 1 + # All processes still running; take first (oldest) process to finish. + return 0 + + j = get_finished_process() + idx, finished_process = processes[j] + del processes[j] + out, err = finished_process.communicate() if pipe_stdout: - std_outs += [out.decode('UTF-8')] - if check and processes[start].returncode != 0: + std_outs += [(idx, out.decode('UTF-8'))] + if check and finished_process.returncode != 0: if out: logger.info(out.decode('UTF-8')) if err: logger.error(err.decode('UTF-8')) - raise Exception('Subprocess %d/%d failed with return code %d! (cmdline: %s)' % (start + 1, len(commands), processes[start].returncode, shlex_join(commands[start]))) - start += 1 - return std_outs + raise Exception('Subprocess %d/%d failed with return code %d! (cmdline: %s)' % (idx + 1, len(commands), finished_process.returncode, shlex_join(commands[idx]))) + num_completed += 1 + + # If processes finished out of order, sort the results to the order of the input. + std_outs.sort(key=lambda x: x[0]) + return [x[1] for x in std_outs] def check_call(cmd, *args, **kw): From c0b7a24849d06d47b3d362b03ec247fa6dd79636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 11:46:03 +0300 Subject: [PATCH 16/26] Use communicate() with timeout --- tools/shared.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index b2016d07da1cc..b3ce4d22b28d1 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -133,18 +133,24 @@ def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp else: # Not spawning a new process (Too many commands running in parallel, or no commands left): find if a process has finished. def get_finished_process(): - j = 0 - while j < len(processes): - if processes[j][1].poll() is not None: - return j - j += 1 - # All processes still running; take first (oldest) process to finish. - return 0 - - j = get_finished_process() + while True: + j = 0 + while j < len(processes): + if processes[j][1].poll() is not None: + out, err = processes[j][1].communicate() + return (j, '', '') + j += 1 + # All processes still running; wait a short while for the first (oldest) process to finish, + # then look again if any process has completed. + try: + out, err = processes[0][1].communicate(0.2) + return (0, out, err) + except TimeoutExpired: + pass + + j, out, err = get_finished_process() idx, finished_process = processes[j] del processes[j] - out, err = finished_process.communicate() if pipe_stdout: std_outs += [(idx, out.decode('UTF-8'))] if check and finished_process.returncode != 0: From 7d4c63efb76dadd5cf462b6ffe0fa420525c3720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 13:17:50 +0300 Subject: [PATCH 17/26] Fix TimeoutExpired --- tools/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index b3ce4d22b28d1..c4d2cb97738d0 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -145,7 +145,7 @@ def get_finished_process(): try: out, err = processes[0][1].communicate(0.2) return (0, out, err) - except TimeoutExpired: + except subprocess.TimeoutExpired: pass j, out, err = get_finished_process() From edb014541a1e5226a836be46a0d6e3054d18d582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 17:04:24 +0300 Subject: [PATCH 18/26] Fix Linux UTF-8 decoding --- tools/shared.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index c4d2cb97738d0..cad793c20bd4a 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -144,7 +144,7 @@ def get_finished_process(): # then look again if any process has completed. try: out, err = processes[0][1].communicate(0.2) - return (0, out, err) + return (0, out.decode('UTF-8') if out else '', err.decode('UTF-8') if err else '') except subprocess.TimeoutExpired: pass @@ -152,12 +152,12 @@ def get_finished_process(): idx, finished_process = processes[j] del processes[j] if pipe_stdout: - std_outs += [(idx, out.decode('UTF-8'))] + std_outs += [(idx, out)] if check and finished_process.returncode != 0: if out: - logger.info(out.decode('UTF-8')) + logger.info(out) if err: - logger.error(err.decode('UTF-8')) + logger.error(err) raise Exception('Subprocess %d/%d failed with return code %d! (cmdline: %s)' % (idx + 1, len(commands), finished_process.returncode, shlex_join(commands[idx]))) num_completed += 1 From 7c6c2cf51c0c53ed841452168f5ea107166b4923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 17:12:34 +0300 Subject: [PATCH 19/26] Pipe stderr, add check that both file and string stdout piping is not simultaneously active --- tools/shared.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index cad793c20bd4a..d572a2efb90b6 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -107,6 +107,9 @@ def get_num_cores(): def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): std_outs = [] + if route_stdout_to_temp_files_suffix and pipe_stdout: + raise Exception('Cannot simultaneously pipe stdout to file and a string! Choose one or the other.') + # TODO: Experiment with registering a signal handler here to see if that helps with Ctrl-C locking up the command prompt # when multiple child processes have been spawned. # import signal @@ -126,7 +129,7 @@ def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) if DEBUG: logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) - processes += [(i, subprocess.Popen(commands[i], stdout=std_out, env=env, cwd=cwd))] + processes += [(i, subprocess.Popen(commands[i], stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd))] if route_stdout_to_temp_files_suffix: std_outs += [(i, std_out.name)] i += 1 From f15bc9d19f7f22cdcac3de001b7da22748fc9e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 17:53:16 +0300 Subject: [PATCH 20/26] Improve subprocess spawn utilization --- tools/shared.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index d572a2efb90b6..89f2d4b7dd951 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -123,16 +123,21 @@ def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp temp_files = configuration.get_temp_files() i = 0 num_completed = 0 + + def launch_new_process(): + nonlocal processes, std_outs, i + std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) + if DEBUG: + logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) + processes += [(i, subprocess.Popen(commands[i], stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd))] + if route_stdout_to_temp_files_suffix: + std_outs += [(i, std_out.name)] + i += 1 + while num_completed < len(commands): if i < len(commands) and len(processes) < num_parallel_processes: # Not enough parallel processes running, spawn a new one. - std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) - if DEBUG: - logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) - processes += [(i, subprocess.Popen(commands[i], stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd))] - if route_stdout_to_temp_files_suffix: - std_outs += [(i, std_out.name)] - i += 1 + launch_new_process() else: # Not spawning a new process (Too many commands running in parallel, or no commands left): find if a process has finished. def get_finished_process(): @@ -140,6 +145,9 @@ def get_finished_process(): j = 0 while j < len(processes): if processes[j][1].poll() is not None: + # Immediately launch the next process to maximize utilization + if i < len(commands): + launch_new_process() out, err = processes[j][1].communicate() return (j, '', '') j += 1 From 24d925545e3e4e1226111186c181b99f01b71179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 18:27:16 +0300 Subject: [PATCH 21/26] Test against multiprocessing --- tools/shared.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/shared.py b/tools/shared.py index 89f2d4b7dd951..199fdc7c2261b 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -100,11 +100,38 @@ def get_num_cores(): return int(os.environ.get('EMCC_CORES', os.cpu_count())) +multiprocessing_pool = None + +def get_multiprocessing_pool(): + import multiprocessing + global multiprocessing_pool + if multiprocessing_pool: + return multiprocessing_pool + multiprocessing_pool = multiprocessing.Pool(processes=get_num_cores()) + return multiprocessing_pool + + +def mp_run_process(command_tuple): + cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd = command_tuple + std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) + ret = std_out.name if route_stdout_to_temp_files_suffix else None + proc = subprocess.Popen(cmd, stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd) + out, err = proc.communicate() + if pipe_stdout: + ret = out.decode('UTF-8') + return ret + + +def run_multiple_processes_multiprocessing(commands, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd): + return get_multiprocessing_pool().map(mp_run_process, [(cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd) for cmd in commands], chunksize=1) + # Runs multiple subprocess commands. # bool 'check': If True (default), raises an exception if any of the subprocesses failed with a nonzero exit code. # string 'route_stdout_to_temp_files_suffix': if not None, all stdouts are instead written to files, and an array of filenames is returned. # bool 'pipe_stdout': If True, an array of stdouts is returned, for each subprocess. def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): + return run_multiple_processes_multiprocessing(commands, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd) + std_outs = [] if route_stdout_to_temp_files_suffix and pipe_stdout: From 0b24090a01766c939695244e8c8eec2482e41e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 18:27:42 +0300 Subject: [PATCH 22/26] Add missing stdout --- tools/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index 199fdc7c2261b..7a1ffbd4e6c88 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -176,7 +176,7 @@ def get_finished_process(): if i < len(commands): launch_new_process() out, err = processes[j][1].communicate() - return (j, '', '') + return (j, out.decode('UTF-8') if out else '', err.decode('UTF-8') if err else '') j += 1 # All processes still running; wait a short while for the first (oldest) process to finish, # then look again if any process has completed. From 9a857ef80006329634afbbf6cdb53ec4860e02f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 18:32:39 +0300 Subject: [PATCH 23/26] Remove launch_new_process() --- tools/shared.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index 7a1ffbd4e6c88..e01321ada0455 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -151,20 +151,16 @@ def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp i = 0 num_completed = 0 - def launch_new_process(): - nonlocal processes, std_outs, i - std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) - if DEBUG: - logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) - processes += [(i, subprocess.Popen(commands[i], stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd))] - if route_stdout_to_temp_files_suffix: - std_outs += [(i, std_out.name)] - i += 1 - while num_completed < len(commands): if i < len(commands) and len(processes) < num_parallel_processes: # Not enough parallel processes running, spawn a new one. - launch_new_process() + std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) + if DEBUG: + logger.debug('Running subprocess %d/%d: %s' % (i + 1, len(commands), ' '.join(commands[i]))) + processes += [(i, subprocess.Popen(commands[i], stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd))] + if route_stdout_to_temp_files_suffix: + std_outs += [(i, std_out.name)] + i += 1 else: # Not spawning a new process (Too many commands running in parallel, or no commands left): find if a process has finished. def get_finished_process(): @@ -172,9 +168,6 @@ def get_finished_process(): j = 0 while j < len(processes): if processes[j][1].poll() is not None: - # Immediately launch the next process to maximize utilization - if i < len(commands): - launch_new_process() out, err = processes[j][1].communicate() return (j, out.decode('UTF-8') if out else '', err.decode('UTF-8') if err else '') j += 1 From 2402ef36f674b6ec38f33eea3a5b1456ebc1e76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 18:38:32 +0300 Subject: [PATCH 24/26] Cleanup code --- tools/shared.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/tools/shared.py b/tools/shared.py index e01321ada0455..b13233e0132c6 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -25,7 +25,7 @@ from .toolchain_profiler import ToolchainProfiler from .tempfiles import try_delete -from .utils import path_from_root, exit_with_error, safe_ensure_dirs, WINDOWS +from .utils import path_from_root, exit_with_error, safe_ensure_dirs, WINDOWS, LINUX from . import cache, tempfiles, colored_logger from . import diagnostics from . import config @@ -38,6 +38,9 @@ EXPECTED_LLVM_VERSION = "13.0" PYTHON = sys.executable +# Used only on Linux +multiprocessing_pool = None + # can add %(asctime)s to see timestamps logging.basicConfig(format='%(name)s:%(levelname)s: %(message)s', level=logging.DEBUG if DEBUG else logging.INFO) @@ -100,37 +103,29 @@ def get_num_cores(): return int(os.environ.get('EMCC_CORES', os.cpu_count())) -multiprocessing_pool = None - -def get_multiprocessing_pool(): - import multiprocessing - global multiprocessing_pool - if multiprocessing_pool: - return multiprocessing_pool - multiprocessing_pool = multiprocessing.Pool(processes=get_num_cores()) - return multiprocessing_pool - - def mp_run_process(command_tuple): cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd = command_tuple std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) ret = std_out.name if route_stdout_to_temp_files_suffix else None proc = subprocess.Popen(cmd, stdout=std_out, stderr=subprocess.PIPE if pipe_stdout else None, env=env, cwd=cwd) - out, err = proc.communicate() + out, _ = proc.communicate() if pipe_stdout: ret = out.decode('UTF-8') return ret -def run_multiple_processes_multiprocessing(commands, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd): - return get_multiprocessing_pool().map(mp_run_process, [(cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd) for cmd in commands], chunksize=1) - # Runs multiple subprocess commands. # bool 'check': If True (default), raises an exception if any of the subprocesses failed with a nonzero exit code. # string 'route_stdout_to_temp_files_suffix': if not None, all stdouts are instead written to files, and an array of filenames is returned. # bool 'pipe_stdout': If True, an array of stdouts is returned, for each subprocess. def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): - return run_multiple_processes_multiprocessing(commands, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd) + # Spawning multiple processes on Linux is slower without multiprocessing pool. On Windows and macOS, not using multiprocessing pool is faster. + if LINUX: + import multiprocessing + global multiprocessing_pool + if not multiprocessing_pool: + multiprocessing_pool = multiprocessing.Pool(processes=get_num_cores()) + return multiprocessing_pool.map(mp_run_process, [(cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd) for cmd in commands], chunksize=1) std_outs = [] From ca61c04317a7432aa2828eec1520ae1e870b41ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 22:58:42 +0300 Subject: [PATCH 25/26] Add EM_PYTHON_MULTIPROCESSING --- ChangeLog.md | 3 +++ tools/shared.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 98400f974877a..48882a9c1d8f3 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,9 @@ See docs/process.md for more on how version tagging works. Current Trunk ------------- +- Removed use of Python multiprocessing library because of stability issues. Added + new environment variable EM_PYTHON_MULTIPROCESSING=1 that can be enabled + to revert back to using Python multiprocessing. (#13493) - Binaryen now always inlines single-use functions. This should reduce code size and improve performance (#13744). diff --git a/tools/shared.py b/tools/shared.py index b13233e0132c6..e3a5f982ef833 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -104,6 +104,7 @@ def get_num_cores(): def mp_run_process(command_tuple): + temp_files = configuration.get_temp_files() cmd, env, route_stdout_to_temp_files_suffix, pipe_stdout, check, cwd = command_tuple std_out = temp_files.get(route_stdout_to_temp_files_suffix) if route_stdout_to_temp_files_suffix else (subprocess.PIPE if pipe_stdout else None) ret = std_out.name if route_stdout_to_temp_files_suffix else None @@ -119,8 +120,9 @@ def mp_run_process(command_tuple): # string 'route_stdout_to_temp_files_suffix': if not None, all stdouts are instead written to files, and an array of filenames is returned. # bool 'pipe_stdout': If True, an array of stdouts is returned, for each subprocess. def run_multiple_processes(commands, env=os.environ.copy(), route_stdout_to_temp_files_suffix=None, pipe_stdout=False, check=True, cwd=None): - # Spawning multiple processes on Linux is slower without multiprocessing pool. On Windows and macOS, not using multiprocessing pool is faster. - if LINUX: + # By default, avoid using Python multiprocessing library due to a large amount of bugs it has on Windows (#8013, #718, #13785, etc.) + # Use EM_PYTHON_MULTIPROCESSING=1 environment variable to enable it. It can be faster, but may not work on Windows. + if int(os.getenv('EM_PYTHON_MULTIPROCESSING', '0')): import multiprocessing global multiprocessing_pool if not multiprocessing_pool: From 29301864bd976132ec3f7dadaede38610529f75b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Jyl=C3=A4nki?= Date: Wed, 31 Mar 2021 23:00:48 +0300 Subject: [PATCH 26/26] Flake --- tools/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/shared.py b/tools/shared.py index e3a5f982ef833..f867bcd254ff8 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -25,7 +25,7 @@ from .toolchain_profiler import ToolchainProfiler from .tempfiles import try_delete -from .utils import path_from_root, exit_with_error, safe_ensure_dirs, WINDOWS, LINUX +from .utils import path_from_root, exit_with_error, safe_ensure_dirs, WINDOWS from . import cache, tempfiles, colored_logger from . import diagnostics from . import config