From 9610ae889e6fd45280c5beb7fe8f5bef2d736878 Mon Sep 17 00:00:00 2001 From: Alex Torok Date: Tue, 12 Apr 2022 12:16:52 -0700 Subject: [PATCH] Update PythonZipper action to use CommandLineItem.CapturingMapFn ### Summary This PR attempts to address https://github.com/bazelbuild/bazel/issues/14890. This updates the PythonZipper action to use a `CommandLineItem.CapturingMapFn` to defer expanding arguments until after analysis. I used `CapturingMapFn`, as it seemed like the only way to go about implementing the arg filtering that is done by the current code to exclude the `executable` and `zipFile` artifacts. ### Test Plan Use the example from https://github.com/bazelbuild/bazel/issues/14890 of looking at memory usage in tensorflow before/after this patch is applied on top of version `5.0.0` Initial Setup ``` git clone git@github.com:tensorflow/tensorflow.git cd tensorflow python3 -m venv venv source venv/bin/activate pip install numpy ``` View memory usage at 5.0.0: ```bash # STARTUP_FLAGS from https://bazel.build/rules/performance#enabling-memory-tracking $ bazel ${STARTUP_FLAGS[@]} build --nobuild //tensorflow/python/... $ bazel ${STARTUP_FLAGS[@]} dump --rules | egrep '(RULE|py_)' RULE COUNT ACTIONS BYTES EACH py_test 1,714 15,390 413,338,632 241,154 py_library 1,102 0 2,097,152 1,903 py_binary 33 198 8,914,840 270,146 py_runtime 6 0 0 0 py_runtime_pair 3 0 0 0 _concat_flatbuffer_py_srcs 2 2 0 0 $ bazel ${STARTUP_FLAGS[@]} info used-heap-size-after-gc 635MB ``` View memory usage at 5.0.0 with this patch applied: ```bash $ ~/code/bazel/bazel-bin/src/bazel ${STARTUP_FLAGS[@]} dump --rules | egrep '(RULE|py_)' RULE COUNT ACTIONS BYTES EACH py_test 1,714 15,390 65,323,312 38,111 py_library 1,102 0 2,359,576 2,141 py_binary 33 198 524,400 15,890 py_runtime 6 0 0 0 py_runtime_pair 3 0 0 0 _concat_flatbuffer_py_srcs 2 2 0 0 $ ~/code/bazel/bazel-bin/src/bazel ${STARTUP_FLAGS[@]} info used-heap-size-after-gc 283MB ``` Ensure that the generated actions have not changed: ```bash $ bazel aquery --build_python_zip 'mnemonic("PythonZipper", //tensorflow/python/...)' > /tmp/bazel_5_out $ ~/code/bazel/bazel-bin/src/bazel aquery --build_python_zip 'mnemonic("PythonZipper", //tensorflow/python/...)' > /tmp/bazel_5_patched_out $ diff /tmp/bazel_5_out /tmp/bazel_5_patched_out # note: no diff output ``` Closes #15037. PiperOrigin-RevId: 441257695 --- .../build/lib/bazel/rules/python/BUILD | 1 + .../rules/python/BazelPythonSemantics.java | 21 +++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BUILD index dbcd04e8b0687b..26da7689d97889 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BUILD @@ -22,6 +22,7 @@ java_library( deps = [ "//src/main/java/com/google/devtools/build/lib/actions", "//src/main/java/com/google/devtools/build/lib/actions:artifacts", + "//src/main/java/com/google/devtools/build/lib/actions:commandline_item", "//src/main/java/com/google/devtools/build/lib/analysis:actions/custom_command_line", "//src/main/java/com/google/devtools/build/lib/analysis:actions/launcher_file_write_action", "//src/main/java/com/google/devtools/build/lib/analysis:actions/substitution", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java index a76ba940ad72d9..94ef431c5e10f3 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java @@ -20,6 +20,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.Artifact; +import com.google.devtools.build.lib.actions.CommandLineItem; import com.google.devtools.build.lib.actions.ParamFileInfo; import com.google.devtools.build.lib.actions.ParameterFile; import com.google.devtools.build.lib.analysis.AnalysisUtils; @@ -367,12 +368,24 @@ private static void createPythonZipAction( // Read each runfile from execute path, add them into zip file at the right runfiles path. // Filter the executable file, cause we are building it. + argv.addAll( + CustomCommandLine.VectorArg.of(runfilesSupport.getRunfilesArtifacts()) + .mapped( + (CommandLineItem.CapturingMapFn) + (artifact, args) -> { + if (!artifact.equals(executable) && !artifact.equals(zipFile)) { + args.accept( + getZipRunfilesPath( + artifact.getRunfilesPath(), + workspaceName, + legacyExternalRunfiles) + + "=" + + artifact.getExecPathString()); + } + })); + for (Artifact artifact : runfilesSupport.getRunfilesArtifacts().toList()) { if (!artifact.equals(executable) && !artifact.equals(zipFile)) { - argv.addDynamicString( - getZipRunfilesPath(artifact.getRunfilesPath(), workspaceName, legacyExternalRunfiles) - + "=" - + artifact.getExecPathString()); inputsBuilder.add(artifact); } }