diff --git a/emar.py b/emar.py index ec455acb4bffb..23428ce67ceb7 100755 --- a/emar.py +++ b/emar.py @@ -18,12 +18,8 @@ import sys -from tools.toolchain_profiler import ToolchainProfiler from tools import shared -if __name__ == '__main__': - ToolchainProfiler.record_process_start() - # # Main run() function diff --git a/emcc.py b/emcc.py index 450e600cce0e6..290b6d4dd7bcb 100755 --- a/emcc.py +++ b/emcc.py @@ -23,6 +23,7 @@ your system headers will be used. """ +from tools.toolchain_profiler import ToolchainProfiler import base64 import json @@ -36,6 +37,8 @@ import time from enum import Enum from subprocess import PIPE +from urllib.parse import quote + import emscripten from tools import shared, system_libs @@ -46,18 +49,12 @@ from tools.response_file import substitute_response_files from tools.minimal_runtime_shell import generate_minimal_runtime_html import tools.line_endings -from tools.toolchain_profiler import ToolchainProfiler from tools import js_manipulation from tools import wasm2c from tools import webassembly from tools import config from tools.settings import settings -if __name__ == '__main__': - ToolchainProfiler.record_process_start() - -from urllib.parse import quote - logger = logging.getLogger('emcc') # endings = dot + a suffix, safe to test by filename.endswith(endings) diff --git a/emscripten.py b/emscripten.py index 676d83d33869e..406da9b2d9ee1 100644 --- a/emscripten.py +++ b/emscripten.py @@ -9,6 +9,8 @@ headers, for the libc implementation in JS). """ +from tools.toolchain_profiler import ToolchainProfiler + import os import json import subprocess @@ -24,7 +26,6 @@ from tools import gen_struct_info from tools import webassembly from tools.shared import WINDOWS, path_from_root, exit_with_error, asmjs_mangle, treat_as_user_function -from tools.toolchain_profiler import ToolchainProfiler from tools.settings import settings logger = logging.getLogger('emscripten') diff --git a/site/source/docs/optimizing/Profiling-Toolchain.rst b/site/source/docs/optimizing/Profiling-Toolchain.rst index 3058e137850ef..562a9bf223733 100644 --- a/site/source/docs/optimizing/Profiling-Toolchain.rst +++ b/site/source/docs/optimizing/Profiling-Toolchain.rst @@ -44,18 +44,6 @@ The output HTML filename can be chosen with the optional ``--outfile=myresults.h Instrumenting Python Scripts ============================ -In order for the toolchain to work, each "top level" Python script (a script that is directly called from the command line, or by a subprocess spawn) should have the following preamble in the beginning of the script: - -.. code-block:: python - - from tools.toolchain_profiler import ToolchainProfiler - if __name__ == '__main__': - ToolchainProfiler.record_process_start() - -Additionally, at the end of the script when the script is about to exit, it should do so by explicitly calling either the ``sys.exit()`` function, or by calling the ``ToolchainProfiler.record_process_exit()`` function, whichever is more convenient for the script. The function ``ToolchainProfiler.record_process_exit()`` does not exit by itself, but only records that the process is quitting. - -These two blocks ensure that the toolchain profiler will be aware of all tool invocations that occur. In the graphed output, the process spawns will be shown in green color. - Python Profiling Blocks ----------------------- diff --git a/tools/building.py b/tools/building.py index f73c5e41ad8d7..19502b5e03569 100644 --- a/tools/building.py +++ b/tools/building.py @@ -3,6 +3,8 @@ # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. +from .toolchain_profiler import ToolchainProfiler + import json import logging import os @@ -19,7 +21,6 @@ from . import shared from . import webassembly from . import config -from .toolchain_profiler import ToolchainProfiler from .shared import CLANG_CC, CLANG_CXX, PYTHON from .shared import LLVM_NM, EMCC, EMAR, EMXX, EMRANLIB, WASM_LD, LLVM_AR from .shared import LLVM_LINK, LLVM_OBJCOPY diff --git a/tools/file_packager.py b/tools/file_packager.py index a75cfa7124cc3..7715b3e06f8a7 100755 --- a/tools/file_packager.py +++ b/tools/file_packager.py @@ -64,10 +64,6 @@ sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from tools.toolchain_profiler import ToolchainProfiler -if __name__ == '__main__': - ToolchainProfiler.record_process_start() - import posixpath from tools import shared from subprocess import PIPE diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index f809411310ca7..54e4abc40a0c9 100755 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -16,9 +16,6 @@ from tools.toolchain_profiler import ToolchainProfiler from tools import building, config, shared -if __name__ == '__main__': - ToolchainProfiler.record_process_start() - configuration = shared.configuration temp_files = configuration.get_temp_files() @@ -404,7 +401,6 @@ def run(filename, passes, extra_info=None): def main(): - ToolchainProfiler.record_process_start() last = sys.argv[-1] if '{' in last: extra_info = json.loads(last) diff --git a/tools/shared.py b/tools/shared.py index 0c1135a7aa22e..5566a56ea7fce 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -3,6 +3,8 @@ # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. +from .toolchain_profiler import ToolchainProfiler + from subprocess import PIPE import atexit import binascii @@ -22,7 +24,6 @@ print('error: emscripten requires python 3.6 or above', file=sys.stderr) sys.exit(1) -from .toolchain_profiler import ToolchainProfiler from .tempfiles import try_delete from .utils import path_from_root, exit_with_error, safe_ensure_dirs, WINDOWS from . import cache, tempfiles, colored_logger diff --git a/tools/system_libs.py b/tools/system_libs.py index b0d4521fe299b..3722d53a351e3 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -3,6 +3,8 @@ # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. +from .toolchain_profiler import ToolchainProfiler + import glob import hashlib import itertools @@ -12,7 +14,6 @@ import sys from glob import iglob -from .toolchain_profiler import ToolchainProfiler from . import shared, building, ports, config, utils from . import deps_info, tempfiles from . import diagnostics diff --git a/tools/toolchain_profiler.py b/tools/toolchain_profiler.py index 187cee4d53bda..233af3da9a5ed 100644 --- a/tools/toolchain_profiler.py +++ b/tools/toolchain_profiler.py @@ -3,6 +3,7 @@ # University of Illinois/NCSA Open Source License. Both these licenses can be # found in the LICENSE file. +import atexit import subprocess import os import time @@ -22,9 +23,12 @@ original_subprocess_check_call = subprocess.check_call original_subprocess_check_output = subprocess.check_output original_Popen = subprocess.Popen + process_returncode = None def profiled_sys_exit(returncode): - ToolchainProfiler.record_process_exit(returncode) + # Stack the returncode which then gets used in the atexit handler + global process_returncode + process_returncode = returncode original_sys_exit(returncode) def profiled_call(cmd, *args, **kw): @@ -78,14 +82,15 @@ def communicate(self, *args, **kwargs): subprocess.Popen = ProfiledPopen class ToolchainProfiler: - # Provide a running counter towards negative numbers for PIDs for which we don't know what the actual process ID is + # Provide a running counter towards negative numbers for PIDs for which we + # don't know what the actual process ID is imaginary_pid_ = 0 profiler_logs_path = None # Log file not opened yet block_stack = [] - # Because process spawns are tracked from multiple entry points, it is possible that record_process_start() and/or record_process_exit() - # are called multiple times. Prevent recording multiple entries to the logs to keep them clean. + # Track if record_process_exit and record_process_start have been called + # so we can assert they are only ever called once. process_start_recorded = False process_exit_recorded = False @@ -95,15 +100,14 @@ def timestamp(): @staticmethod def log_access(): - # If somehow the process escaped opening the log at startup, do so now. (this biases the startup time of the process, but best effort) - if not ToolchainProfiler.profiler_logs_path: - ToolchainProfiler.record_process_start() - - # Note: This function is called in two importantly different contexts: in "main" process and in python subprocesses - # invoked via subprocessing.Pool.map(). The subprocesses have their own PIDs, and hence record their own data JSON - # files, but since the process pool is maintained internally by python, the toolchain profiler does not track the - # parent->child process spawns for the subprocessing pools. Therefore any profiling events that the subprocess - # children generate are virtually treated as if they were performed by the parent PID. + # Note: This function is called in two importantly different contexts: in + # "main" process and in python subprocesses invoked via + # subprocessing.Pool.map(). The subprocesses have their own PIDs, and + # hence record their own data JSON files, but since the process pool is + # maintained internally by python, the toolchain profiler does not track + # the parent->child process spawns for the subprocessing pools. Therefore + # any profiling events that the subprocess children generate are virtually + # treated as if they were performed by the parent PID. return open(os.path.join(ToolchainProfiler.profiler_logs_path, 'toolchain_profiler.pid_' + str(os.getpid()) + '.json'), 'a') @staticmethod @@ -116,18 +120,17 @@ def escape_args(args): @staticmethod def record_process_start(write_log_entry=True): - # For subprocessing.Pool.map() child processes, this points to the PID of the parent process that spawned - # the subprocesses. This makes the subprocesses look as if the parent had called the functions. + assert not ToolchainProfiler.process_start_recorded + ToolchainProfiler.process_start_recorded = True + atexit.register(ToolchainProfiler.record_process_exit) + + # For subprocessing.Pool.map() child processes, this points to the PID of + # the parent process that spawned the subprocesses. This makes the + # subprocesses look as if the parent had called the functions. ToolchainProfiler.mypid_str = str(os.getpid()) ToolchainProfiler.profiler_logs_path = os.path.join(tempfile.gettempdir(), 'emscripten_toolchain_profiler_logs') - try: - os.makedirs(ToolchainProfiler.profiler_logs_path) - except OSError: - pass + os.makedirs(ToolchainProfiler.profiler_logs_path, exist_ok=True) - if ToolchainProfiler.process_start_recorded: - return - ToolchainProfiler.process_start_recorded = True ToolchainProfiler.block_stack = [] if write_log_entry: @@ -135,13 +138,16 @@ def record_process_start(write_log_entry=True): f.write('[\n{"pid":' + ToolchainProfiler.mypid_str + ',"subprocessPid":' + str(os.getpid()) + ',"op":"start","time":' + ToolchainProfiler.timestamp() + ',"cmdLine":["' + '","'.join(ToolchainProfiler.escape_args(sys.argv)) + '"]}') @staticmethod - def record_process_exit(returncode): - if ToolchainProfiler.process_exit_recorded: - return + def record_process_exit(): + assert not ToolchainProfiler.process_exit_recorded + assert ToolchainProfiler.process_start_recorded ToolchainProfiler.process_exit_recorded = True ToolchainProfiler.exit_all_blocks() with ToolchainProfiler.log_access() as f: + returncode = process_returncode + if returncode is None: + returncode = '"MISSING EXIT CODE"' f.write(',\n{"pid":' + ToolchainProfiler.mypid_str + ',"subprocessPid":' + str(os.getpid()) + ',"op":"exit","time":' + ToolchainProfiler.timestamp() + ',"returncode":' + str(returncode) + '}\n]\n') @staticmethod @@ -211,28 +217,9 @@ def imaginary_pid(): ToolchainProfiler.imaginary_pid_ -= 1 return ToolchainProfiler.imaginary_pid_ + ToolchainProfiler.record_process_start() else: class ToolchainProfiler: - @staticmethod - def record_process_start(): - pass - - @staticmethod - def record_process_exit(returncode): - pass - - @staticmethod - def record_subprocess_spawn(process_pid, process_cmdline): - pass - - @staticmethod - def record_subprocess_wait(process_pid): - pass - - @staticmethod - def record_subprocess_finish(process_pid, returncode): - pass - @staticmethod def enter_block(block_name): pass