diff --git a/build.xml b/build.xml index b66b1462c0d..d5491bda4d0 100644 --- a/build.xml +++ b/build.xml @@ -1016,6 +1016,7 @@ + diff --git a/src/com/facebook/buck/features/python/BUCK b/src/com/facebook/buck/features/python/BUCK index 0c2b3ad7c4e..b5ed21b8480 100644 --- a/src/com/facebook/buck/features/python/BUCK +++ b/src/com/facebook/buck/features/python/BUCK @@ -69,6 +69,7 @@ java_library_with_plugins( "__test_main__.py", "compile.py", "run_inplace.py.in", + "run_inplace_interpreter_wrapper.py.in", "run_inplace_lite.py.in", ], tests = [ diff --git a/src/com/facebook/buck/features/python/PythonInPlaceBinary.java b/src/com/facebook/buck/features/python/PythonInPlaceBinary.java index e41caa29131..8f4d17008fc 100644 --- a/src/com/facebook/buck/features/python/PythonInPlaceBinary.java +++ b/src/com/facebook/buck/features/python/PythonInPlaceBinary.java @@ -18,15 +18,18 @@ import com.facebook.buck.core.build.buildable.context.BuildableContext; import com.facebook.buck.core.build.context.BuildContext; +import com.facebook.buck.core.filesystems.AbsPath; import com.facebook.buck.core.filesystems.RelPath; import com.facebook.buck.core.model.BuildTarget; import com.facebook.buck.core.model.OutputLabel; import com.facebook.buck.core.model.TargetConfiguration; +import com.facebook.buck.core.model.impl.BuildTargetPaths; import com.facebook.buck.core.rulekey.AddToRuleKey; import com.facebook.buck.core.rules.BuildRule; import com.facebook.buck.core.rules.BuildRuleResolver; import com.facebook.buck.core.rules.attr.HasRuntimeDeps; import com.facebook.buck.core.rules.impl.SymlinkTree; +import com.facebook.buck.core.sourcepath.ExplicitBuildTargetSourcePath; import com.facebook.buck.core.toolchain.tool.Tool; import com.facebook.buck.core.toolchain.tool.impl.CommandTool; import com.facebook.buck.cxx.toolchain.CxxPlatform; @@ -39,6 +42,7 @@ import com.facebook.buck.step.Step; import com.facebook.buck.step.fs.MkdirStep; import com.facebook.buck.step.isolatedsteps.common.WriteFileIsolatedStep; +import com.facebook.buck.test.selectors.Nullable; import com.facebook.buck.util.Escaper; import com.facebook.buck.util.stream.RichStream; import com.google.common.base.Joiner; @@ -57,6 +61,8 @@ public class PythonInPlaceBinary extends PythonBinary implements HasRuntimeDeps { private static final String RUN_INPLACE_RESOURCE = "run_inplace.py.in"; + private static final String RUN_INPLACE_INTERPRETER_WRAPPER_RESOURCE = + "run_inplace_interpreter_wrapper.py.in"; private static final String RUN_INPLACE_LITE_RESOURCE = "run_inplace_lite.py.in"; // TODO(agallagher): Task #8098647: This rule has no steps, so it @@ -68,8 +74,10 @@ public class PythonInPlaceBinary extends PythonBinary implements HasRuntimeDeps // // We should upate the Python test rule to account for this. private final SymlinkTree linkTree; + private final RelPath interpreterWrapperGenPath; @AddToRuleKey private final Tool python; - @AddToRuleKey private final Supplier script; + @AddToRuleKey private final Supplier binScript; + @AddToRuleKey private final Supplier interpreterWrapperScript; PythonInPlaceBinary( BuildTarget buildTarget, @@ -98,18 +106,28 @@ public class PythonInPlaceBinary extends PythonBinary implements HasRuntimeDeps legacyOutputPath); this.linkTree = linkTree; this.python = python; - this.script = - getScript( + this.interpreterWrapperGenPath = + getInterpreterWrapperGenPath( + buildTarget, projectFilesystem, pexExtension, legacyOutputPath); + AbsPath targetRoot = + projectFilesystem + .resolve(getBinPath(buildTarget, projectFilesystem, pexExtension, legacyOutputPath)) + .getParent(); + this.binScript = + getBinScript( + pythonPlatform, + mainModule, + targetRoot.relativize(linkTree.getRoot()), + targetRoot.relativize(projectFilesystem.resolve(interpreterWrapperGenPath)), + packageStyle); + this.interpreterWrapperScript = + getInterpreterWrapperScript( ruleResolver, buildTarget.getTargetConfiguration(), pythonPlatform, cxxPlatform, - mainModule, components, - projectFilesystem - .resolve(getBinPath(buildTarget, projectFilesystem, pexExtension, legacyOutputPath)) - .getParent() - .relativize(linkTree.getRoot()), + targetRoot.relativize(linkTree.getRoot()), preloadLibraries, packageStyle); } @@ -123,6 +141,10 @@ private static String getRunInplaceResource() { return getNamedResource(RUN_INPLACE_RESOURCE); } + private static String getRunInplaceInterpreterWrapperResource() { + return getNamedResource(RUN_INPLACE_INTERPRETER_WRAPPER_RESOURCE); + } + private static String getRunInplaceLiteResource() { return getNamedResource(RUN_INPLACE_LITE_RESOURCE); } @@ -136,29 +158,64 @@ private static String getNamedResource(String resourceName) { } } - private static Supplier getScript( + private static RelPath getInterpreterWrapperGenPath( + BuildTarget target, + ProjectFilesystem filesystem, + String extension, + boolean legacyOutputPath) { + if (!legacyOutputPath) { + target = target.withFlavors(); + } + return BuildTargetPaths.getGenPath( + filesystem.getBuckPaths(), target, "%s#interpreter_wrapper" + extension); + } + + private static Supplier getBinScript( + PythonPlatform pythonPlatform, + String mainModule, + RelPath linkTreeRoot, + RelPath interpreterWrapperPath, + PackageStyle packageStyle) { + return () -> { + String linkTreeRootStr = Escaper.escapeAsPythonString(linkTreeRoot.toString()); + String interpreterWrapperPathStr = + Escaper.escapeAsPythonString(interpreterWrapperPath.toString()); + return new ST( + new STGroup(), + packageStyle == PackageStyle.INPLACE + ? getRunInplaceResource() + : getRunInplaceLiteResource()) + .add("PYTHON", pythonPlatform.getEnvironment().getPythonPath()) + .add("PYTHON_INTERPRETER_FLAGS", pythonPlatform.getInplaceBinaryInterpreterFlags()) + .add("MODULES_DIR", linkTreeRootStr) + .add("MAIN_MODULE", Escaper.escapeAsPythonString(mainModule)) + .add("INTERPRETER_WRAPPER_REL_PATH", interpreterWrapperPathStr) + .render(); + }; + } + + @Nullable + private static Supplier getInterpreterWrapperScript( BuildRuleResolver resolver, TargetConfiguration targetConfiguration, PythonPlatform pythonPlatform, CxxPlatform cxxPlatform, - String mainModule, PythonPackageComponents components, RelPath relativeLinkTreeRoot, ImmutableSet preloadLibraries, PackageStyle packageStyle) { String relativeLinkTreeRootStr = Escaper.escapeAsPythonString(relativeLinkTreeRoot.toString()); Linker ld = cxxPlatform.getLd().resolve(resolver, targetConfiguration); + // Lite mode doesn't need an interpreter wrapper as there's no LD_PRELOADs involved. + if (packageStyle != PackageStyle.INPLACE) { + return null; + } return () -> { ST st = - new ST( - new STGroup(), - packageStyle == PackageStyle.INPLACE - ? getRunInplaceResource() - : getRunInplaceLiteResource()) + new ST(new STGroup(), getRunInplaceInterpreterWrapperResource()) .add("PYTHON", pythonPlatform.getEnvironment().getPythonPath()) - .add("MAIN_MODULE", Escaper.escapeAsPythonString(mainModule)) - .add("MODULES_DIR", relativeLinkTreeRootStr) - .add("PYTHON_INTERPRETER_FLAGS", pythonPlatform.getInplaceBinaryInterpreterFlags()); + .add("PYTHON_INTERPRETER_FLAGS", pythonPlatform.getInplaceBinaryInterpreterFlags()) + .add("MODULES_DIR", relativeLinkTreeRootStr); // Only add platform-specific values when the binary includes native libraries. if (components.getNativeLibraries().getComponents().isEmpty()) { @@ -187,11 +244,26 @@ public ImmutableList getBuildSteps( BuildContext context, BuildableContext buildableContext) { RelPath binPath = context.getSourcePathResolver().getCellUnsafeRelPath(getSourcePathToOutput()); buildableContext.recordArtifact(binPath.getPath()); - return ImmutableList.of( - MkdirStep.of( - BuildCellRelativePath.fromCellRelativePath( - context.getBuildCellRootPath(), getProjectFilesystem(), binPath.getParent())), - WriteFileIsolatedStep.of(script, binPath, /* executable */ true)); + ImmutableList.Builder stepsBuilder = new ImmutableList.Builder(); + stepsBuilder + .add( + MkdirStep.of( + BuildCellRelativePath.fromCellRelativePath( + context.getBuildCellRootPath(), getProjectFilesystem(), binPath.getParent()))) + .add(WriteFileIsolatedStep.of(binScript, binPath, /* executable */ true)); + + if (interpreterWrapperScript != null) { + RelPath interpreterWrapperPath = + context + .getSourcePathResolver() + .getCellUnsafeRelPath( + ExplicitBuildTargetSourcePath.of(getBuildTarget(), interpreterWrapperGenPath)); + buildableContext.recordArtifact(interpreterWrapperPath.getPath()); + stepsBuilder.add( + WriteFileIsolatedStep.of( + interpreterWrapperScript, interpreterWrapperPath, /* executable */ true)); + } + return stepsBuilder.build(); } @Override diff --git a/src/com/facebook/buck/features/python/run_inplace.py.in b/src/com/facebook/buck/features/python/run_inplace.py.in index f3da12c1f86..53a2afa6aa6 100755 --- a/src/com/facebook/buck/features/python/run_inplace.py.in +++ b/src/com/facebook/buck/features/python/run_inplace.py.in @@ -7,11 +7,6 @@ import subprocess import sys main_module = -modules_dir = -native_libs_env_var = -native_libs_dir = -native_libs_preload_env_var = -native_libs_preload = def try_resolve_possible_symlink(path): import ctypes @@ -63,26 +58,6 @@ if platform.system() == "Windows": # does *not* dereference symlinks on windows until, like, 3.8 maybe. dirpath = os.path.dirname(try_resolve_possible_symlink(sys.argv[0])) -env_vals_to_restore = {} -# Update the environment variable for the dynamic loader to the native -# libraries location. -if native_libs_dir is not None: - old_native_libs_dir = os.environ.get(native_libs_env_var) - os.environ[native_libs_env_var] = os.path.join(dirpath, native_libs_dir) - env_vals_to_restore[native_libs_env_var] = old_native_libs_dir - -# Update the environment variable for the dynamic loader to find libraries -# to preload. -if native_libs_preload is not None: - old_native_libs_preload = os.environ.get(native_libs_preload_env_var) - env_vals_to_restore[native_libs_preload_env_var] = old_native_libs_preload - - # On macos, preloaded libs are found via paths. - os.environ[native_libs_preload_env_var] = ":".join( - os.path.join(dirpath, native_libs_dir, l) - for l in native_libs_preload.split(":") - ) - # Allow users to decorate the main module. In normal Python invocations this # can be done by prefixing the arguments with `-m decoratingmodule`. It's not # that easy for par files. The startup script below sets up `sys.path` from @@ -128,73 +103,18 @@ if os.environ.pop("PYTHONDEBUGWITHPDB", None): initial_commands=initial_commands, ) -# Note: this full block of code will be included as the argument to Python, -# and will be the first thing that shows up in the process arguments as displayed -# by programs like ps and top. -# -# We include arg0 at the start of this comment just to make it more visible what program -# is being run in the ps and top output. -STARTUP = """\ -# {arg0!r} -# Wrap everything in a private function to prevent globals being captured by -# the `runpy._run_module_as_main` below. -def __run(): - import sys - - # We set the paths beforehand to have a minimal amount of imports before - # nuking PWD from sys.path. Otherwise, there can be problems if someone runs - # from a directory with a similarly named file, even if their code is properly - # namespaced. e.g. if one has foo/bar/contextlib.py and while in foo/bar runs - # `buck run foo/bar:bin`, runpy will fail as it tries to import - # foo/bar/contextlib.py. You're just out of luck if you have sys.py or os.py - - # Set `argv[0]` to the executing script. - assert sys.argv[0] == '-c' - sys.argv[0] = {arg0!r} - - # Replace the working directory with location of the modules directory. - assert sys.path[0] == '' - sys.path[0] = {pythonpath!r} - - import os - import runpy - - def setenv(var, val): - if val is None: - os.environ.pop(var, None) - else: - os.environ[var] = val - - def restoreenv(d): - for k, v in d.items(): - setenv(k, v) - - restoreenv({env_vals!r}) - {module_call} - -__run() -""".format( - arg0=sys.argv[0], - pythonpath=os.path.join(dirpath, modules_dir), - env_vals=env_vals_to_restore, - main_module=main_module, - this_file=__file__, - module_call=module_call, -) - -args = [sys.executable, "", "-c", STARTUP] - +interpreter_opts = [""] # Default to 'd' warnings, but allow users to control this via PYTHONWARNINGS # The -E causes python to ignore all PYTHON* environment vars so we have to # pass this down using the command line. warnings = os.environ.get("PYTHONWARNINGS", "d").split(",") for item in reversed(warnings): - args.insert(1, "-W{0}".format(item.strip())) + interpreter_opts.insert(0, "-W{0}".format(item.strip())) # Allow users to disable byte code generation by setting the standard environment var. # Same as above, because of -E we have to pass this down using the command line. if "PYTHONDONTWRITEBYTECODE" in os.environ: - args.insert(1, "-B") + interpreter_opts.insert(0, "-B") # Python 3.7 allows benchmarking import time with this variable. Similar issues to # PYTHONDONTWRITEBYTECODE above. If using an earlier version of python... dont set this @@ -205,30 +125,17 @@ if ( and platform.python_implementation() == "CPython" and (sys.version_info[0], sys.version_info[1]) >= (3, 7) ): - args[1:1] = ["-X", "importtime"] + interpreter_opts[0:0] = ["-X", "importtime"] -if platform.system() == "Windows": - # exec on Windows is not true exec - there is only 'spawn' ('CreateProcess'). - # However, creating processes unnecessarily is painful, so we only do the spawn - # path if we have to, which is on Windows. That said, this complicates signal - # handling, so we need to set up some signal forwarding logic. - - p = subprocess.Popen(args + sys.argv[1:]) - - def handler(signum, frame): - # If we're getting this, we need to forward signum to subprocesses - if signum == signal.SIGINT: - p.send_signal(signal.CTRL_C_EVENT) - elif signum == signal.SIGBREAK: - p.send_signal(signal.CTRL_BREAK_EVENT) - else: - # shouldn't happen, we should be killed instead - p.terminate() - - signal.signal(signal.SIGINT, handler) - signal.signal(signal.SIGBREAK, handler) - - p.wait() - sys.exit(p.returncode) +interpreter_wrapper_path = os.path.join(dirpath, ) +if sys.version_info >= (3, 0): + import importlib.machinery + loader = importlib.machinery.SourceFileLoader("interpreter_wrapper", interpreter_wrapper_path) + interpreter_wrapper = loader.load_module() else: - os.execv(sys.executable, args + sys.argv[1:]) + # Buck is sunsetting Python2 support. However this is still needed for some + # unit tests. + import imp + interpreter_wrapper = imp.load_source("interpreter_wrapper", interpreter_wrapper_path) + +interpreter_wrapper.exec_interpreter(dirpath, interpreter_opts, module_call, sys.argv[1:]) diff --git a/src/com/facebook/buck/features/python/run_inplace_interpreter_wrapper.py.in b/src/com/facebook/buck/features/python/run_inplace_interpreter_wrapper.py.in new file mode 100755 index 00000000000..912dcb93c06 --- /dev/null +++ b/src/com/facebook/buck/features/python/run_inplace_interpreter_wrapper.py.in @@ -0,0 +1,185 @@ +#! + +import os +import platform +import signal +import subprocess +import sys + +modules_dir = +native_libs_env_var = +native_libs_dir = +native_libs_preload_env_var = +native_libs_preload = + +def exec_interpreter(dirpath, interpreter_opts, module_call, program_args): + env_vals_to_restore = {} + # Update the environment variable for the dynamic loader to the native + # libraries location. + if native_libs_dir is not None: + old_native_libs_dir = os.environ.get(native_libs_env_var) + os.environ[native_libs_env_var] = os.path.join(dirpath, native_libs_dir) + env_vals_to_restore[native_libs_env_var] = old_native_libs_dir + + # Update the environment variable for the dynamic loader to find libraries + # to preload. + if native_libs_preload is not None: + old_native_libs_preload = os.environ.get(native_libs_preload_env_var) + env_vals_to_restore[native_libs_preload_env_var] = old_native_libs_preload + + # On macos, preloaded libs are found via paths. + os.environ[native_libs_preload_env_var] = ":".join( + os.path.join(dirpath, native_libs_dir, l) + for l in native_libs_preload.split(":") + ) + + # Note: this full block of code will be included as the argument to Python, + # and will be the first thing that shows up in the process arguments as displayed + # by programs like ps and top. + # + # We include arg0 at the start of this comment just to make it more visible what program + # is being run in the ps and top output. + startup = """\ +# {arg0!r} +# Wrap everything in a private function to prevent globals being captured by +# the `runpy._run_module_as_main` below. +def __run(): + import platform + import sys + + # We set the paths beforehand to have a minimal amount of imports before + # nuking PWD from sys.path. Otherwise, there can be problems if someone runs + # from a directory with a similarly named file, even if their code is properly + # namespaced. e.g. if one has foo/bar/contextlib.py and while in foo/bar runs + # `buck run foo/bar:bin`, runpy will fail as it tries to import + # foo/bar/contextlib.py. You're just out of luck if you have sys.py or os.py + + # Set `argv[0]` to the executing script. + assert sys.argv[0] == '-c' + sys.argv[0] = {arg0!r} + + # Use the interpreter wrapper as the mp executable so native libraries can + # be loaded correctly for spawned processes. + if sys.version_info >= (3, 0) and platform.system() == "Linux": + import multiprocessing + context = multiprocessing.get_context("spawn") + context.set_executable({mp_executable!r}) + # `spawn_main` expects conventional `argv`. + if len(sys.argv) >= 3 and sys.argv[2] == "--multiprocessing-fork": + sys.argv = ["-c"] + sys.argv[2:] + + # Replace the working directory with location of the modules directory. + assert sys.path[0] == '' + sys.path[0] = {pythonpath!r} + + import os + import runpy + + def setenv(var, val): + if val is None: + os.environ.pop(var, None) + else: + os.environ[var] = val + + def restoreenv(d): + for k, v in d.items(): + setenv(k, v) + + restoreenv({env_vals_to_restore!r}) + {module_call} + +__run() + """.format( + arg0=sys.argv[0], + mp_executable=__file__, + pythonpath=os.path.join(dirpath, modules_dir), + env_vals_to_restore=env_vals_to_restore, + module_call=module_call, + ) + + interpreter_args = interpreter_opts + ["-c", startup] + program_args + + if platform.system() == "Windows": + # exec on Windows is not true exec - there is only 'spawn' ('CreateProcess'). + # However, creating processes unnecessarily is painful, so we only do the spawn + # path if we have to, which is on Windows. That said, this complicates signal + # handling, so we need to set up some signal forwarding logic. + p = subprocess.Popen([sys.executable] + interpreter_args) + + def handler(signum, frame): + # If we're getting this, we need to forward signum to subprocesses + if signum == signal.SIGINT: + p.send_signal(signal.CTRL_C_EVENT) + elif signum == signal.SIGBREAK: + p.send_signal(signal.CTRL_BREAK_EVENT) + else: + # shouldn't happen, we should be killed instead + p.terminate() + + signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGBREAK, handler) + + p.wait() + sys.exit(p.returncode) + else: + os.execv(sys.executable, [sys.executable] + interpreter_args) + + +if __name__ == "__main__": + """ + Mimics Python interpreter's CLI. Sets up `LD_PRELOAD` for system native + dependencies and injects neccessary prologues to the program before + `execv`-ing the real Python interpreter. + + NOTE: currently the only dependent of this entrypoint is multiprocessing's + spawn method on Linux, which uses the script as the executable for child + processes. The entrypoint closely resembles Python interpreter's CLI and + can be used as an interpreter with access to the build target's native + dependencies. However, for simplicity, it doesn't handle some uncommon ways + for specifying interpreter arguments (e.g. -Ec [command]). + """ + interpreter_opts = [] + module_call = None + program_args = [] + + # Parse the command line arguments to separate Python options and program + # arguments. Identify the program (i.e. one of -m, -c, or script path) so + # it can be deferred after the prologue. + # According to https://docs.python.org/3/using/cmdline.html, all arguments + # followed by the first occurrence of (1) -c [command] (2) -m [module-name] + # (3) [script] are program arguments. + argv = sys.argv[1:] + for opt_idx, opt in enumerate(argv): + if not opt.startswith("-"): + if ( + opt_idx > 0 + and argv[opt_idx - 1].startswith("-") + and argv[opt_idx - 1].endswith(("W", "X")) + ): + interpreter_opts.append(opt) + continue + # Encountered positional argument before encountering "-c" or "-m". + # This means that the argument is a script path. + module_call = "runpy.run_path({script!r})".format(script=opt) + program_args = argv[opt_idx:] + break + elif opt == "-": + raise RuntimeError("The interpreter wrapper doesn't support reading from stdin.") + elif opt == "-c": + module_call = argv[opt_idx + 1] + program_args = argv[opt_idx + 1:] + break + elif opt == "-m": + module_call = "runpy._run_module_as_main({main_module!r}, False)".format( + main_module=argv[opt_idx + 1] + ) + program_args = sys.argv[opt_idx + 1:] + break + else: + interpreter_opts.append(opt) + + if module_call is None: + module_call = "import code; code.interact()" + + dirpath = os.path.dirname(os.path.realpath(__file__)) + exec_interpreter(dirpath, interpreter_opts, module_call, program_args)