From 9d0163002ca63f4cdbaff9420380d72e2a6e38b3 Mon Sep 17 00:00:00 2001
From: Adam Azarchs <adam.azarchs@10xgenomics.com>
Date: Fri, 23 Sep 2022 02:18:23 -0700
Subject: [PATCH] rules/python: Add a `coverage_tool` attribute to
 `py_runtime`.

This allows users to specify a target providing the coveragepy tool (and its dependencies).  This is essential for hermetic python builds, where an absolute path will not really work.  It's also superior to other potential methods using environment variables because the runfiles dependency on the coverage tool and its files is only incurred when building with coverage enabled.

This also builds on the work @TLATER began with https://github.com/bazelbuild/bazel/pull/14677 to integrate with `coveragepy`'s `lcov` support, with an additional step of at least attempting to convert the absolute paths which `coveragepy` uses in the lcov output into the relative paths which the rest of bazel can actually consume.

This is my first time touching Java code professionally, so I'll admit to mostly cargo-culting those parts, and would welcome any feedback on how to improve things there.  I also would have no objections to someone else taking over this PR to get it over the finish line.  I've tested this out with our own team's internal monorepo, and have successfully generated a full combined coverage report for most of our python and go code.  There's still a bunch of things which don't quite work, in particular when it comes to compiled extension modules or executables run from within python tests, but those will need to be addressed separately, and this is already a giant step forward for our team.

Closes https://github.com/bazelbuild/bazel/issues/14436.

Closes #15590.

PiperOrigin-RevId: 476314433
Change-Id: I4be4d10e0af741f4ba1a7b5367c6f7a338a3c43d
---
 site/en/configure/coverage.md                 |  55 +++-
 .../rules/python/BazelPythonSemantics.java    |  37 +++
 .../rules/python/python_stub_template.txt     | 289 +++++++++++++++---
 .../build/lib/rules/python/PyRuntime.java     |  36 ++-
 .../build/lib/rules/python/PyRuntimeInfo.java |  64 +++-
 .../build/lib/rules/python/PyRuntimeRule.java |  16 +
 .../python/PyRuntimeInfoApi.java              |  45 +++
 .../lib/rules/python/PyRuntimeInfoTest.java   |   5 +-
 src/test/shell/bazel/BUILD                    |   9 +
 .../bazel/bazel_coverage_hermetic_py_test.sh  | 174 +++++++++++
 10 files changed, 680 insertions(+), 50 deletions(-)
 create mode 100755 src/test/shell/bazel/bazel_coverage_hermetic_py_test.sh

diff --git a/site/en/configure/coverage.md b/site/en/configure/coverage.md
index 76828f435bd313..4f546df03c3d3e 100644
--- a/site/en/configure/coverage.md
+++ b/site/en/configure/coverage.md
@@ -188,8 +188,61 @@ py_test(
     ],
 )
 ```
-<!-- TODO: Allow specifying a target for `PYTHON_COVERAGE`, instead of having to use `$(location)` -->
 
+If you are using a hermetic Python toolchain, instead of adding the coverage
+dependency to every `py_test` target you can instead add the coverage tool to
+the toolchain configuration.
+
+Because the [pip_install][pip_install_rule] rule depends on the Python
+toolchain, it cannot be used to fetch the `coverage` module.
+Instead, add in your `WORKSPACE` e.g.
+
+```starlark
+http_archive(
+    name = "coverage_linux_x86_64"",
+    build_file_content = """
+py_library(
+    name = "coverage",
+    srcs = ["coverage/__main__.py"],
+    data = glob(["coverage/*", "coverage/**/*.py"]),
+    visibility = ["//visibility:public"],
+)
+""",
+    sha256 = "84631e81dd053e8a0d4967cedab6db94345f1c36107c71698f746cb2636c63e3",
+    type = "zip",
+    urls = [
+        "https://files.pythonhosted.org/packages/74/0d/0f3c522312fd27c32e1abe2fb5c323b583a5c108daf2c26d6e8dfdd5a105/coverage-6.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+    ],
+)
+```
+
+Then configure your python toolchain as e.g.
+
+```starlark
+py_runtime(
+    name = "py3_runtime_linux_x86_64",
+    coverage_tool = "@coverage_linux_x86_64//:coverage",
+    files = ["@python3_9_x86_64-unknown-linux-gnu//:files"],
+    interpreter = "@python3_9_x86_64-unknown-linux-gnu//:bin/python3",
+    python_version = "PY3",
+)
+
+py_runtime_pair(
+    name = "python_runtimes_linux_x86_64",
+    py2_runtime = None,
+    py3_runtime = ":py3_runtime_linux_x86_64",
+)
+
+toolchain(
+    name = "python_toolchain_linux_x86_64",
+    exec_compatible_with = [
+        "@platforms//os:linux",
+        "@platforms//cpu:x86_64",
+    ],
+    toolchain = ":python_runtimes_linux_x86_64",
+    toolchain_type = "@bazel_tools//tools/python:toolchain_type",
+)
+```
 
 [lcov]: https://github.com/linux-test-project/lcov
 [rules_python]: https://github.com/bazelbuild/rules_python
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java
index 5f94e4b114451c..9e6683b30f703b 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/BazelPythonSemantics.java
@@ -92,12 +92,20 @@ public boolean prohibitHyphensInPackagePaths() {
   public void collectRunfilesForBinary(
       RuleContext ruleContext, Runfiles.Builder builder, PyCommon common, CcInfo ccInfo) {
     addRuntime(ruleContext, common, builder);
+    // select() and build configuration should ideally remove coverage as
+    // as dependency, but guard against including it at runtime just in case.
+    if (ruleContext.getConfiguration().isCodeCoverageEnabled()) {
+      addCoverageSupport(ruleContext, common, builder);
+    }
   }
 
   @Override
   public void collectDefaultRunfilesForBinary(
       RuleContext ruleContext, PyCommon common, Runfiles.Builder builder) {
     addRuntime(ruleContext, common, builder);
+    if (ruleContext.getConfiguration().isCodeCoverageEnabled()) {
+      addCoverageSupport(ruleContext, common, builder);
+    }
   }
 
   @Override
@@ -154,6 +162,9 @@ private static void createStubFile(
     // first-stage.
     String pythonBinary = getPythonBinary(ruleContext, common, bazelConfig);
 
+    // The python code coverage tool to use, if any.
+    String coverageTool = getCoverageTool(ruleContext, common);
+
     // Version information for host config diagnostic warning.
     PythonVersion attrVersion = PyCommon.readPythonVersionFromAttribute(ruleContext.attributes());
     boolean attrVersionSpecifiedExplicitly = attrVersion != null;
@@ -172,6 +183,7 @@ private static void createStubFile(
                 Substitution.of(
                     "%main%", common.determineMainExecutableSource(/*withWorkspaceName=*/ true)),
                 Substitution.of("%python_binary%", pythonBinary),
+                Substitution.of("%coverage_tool%", coverageTool == null ? "" : coverageTool),
                 Substitution.of("%imports%", Joiner.on(":").join(common.getImports().toList())),
                 Substitution.of("%workspace_name%", ruleContext.getWorkspaceName()),
                 Substitution.of("%is_zipfile%", boolToLiteral(isForZipFile)),
@@ -461,6 +473,31 @@ private static String getPythonBinary(
     return pythonBinary;
   }
 
+  private static void addCoverageSupport(
+      RuleContext ruleContext, PyCommon common, Runfiles.Builder builder) {
+    PyRuntimeInfo provider = getRuntime(ruleContext, common);
+    if (provider != null && provider.getCoverageTool() != null) {
+      builder.addArtifact(provider.getCoverageTool());
+      builder.addTransitiveArtifacts(provider.getCoverageToolFiles());
+    }
+  }
+
+  @Nullable
+  private static String getCoverageTool(RuleContext ruleContext, PyCommon common) {
+    if (!ruleContext.getConfiguration().isCodeCoverageEnabled()) {
+      return null;
+    }
+    String coverageTool = null;
+    PyRuntimeInfo provider = getRuntime(ruleContext, common);
+    if (provider != null && provider.getCoverageTool() != null) {
+      PathFragment workspaceName =
+          PathFragment.create(ruleContext.getRule().getPackage().getWorkspaceName());
+      coverageTool =
+          workspaceName.getRelative(provider.getCoverageTool().getRunfilesPath()).getPathString();
+    }
+    return coverageTool;
+  }
+
   private static String getStubShebang(RuleContext ruleContext, PyCommon common) {
     PyRuntimeInfo provider = getRuntime(ruleContext, common);
     if (provider != null) {
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt
index eb31c8dd1e4a48..e539c7b4c123ae 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt
+++ b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt
@@ -86,20 +86,44 @@ def SearchPath(name):
 
 def FindPythonBinary(module_space):
   """Finds the real Python binary if it's not a normal absolute path."""
-  if PYTHON_BINARY.startswith('//'):
+  return FindBinary(module_space, PYTHON_BINARY)
+
+def PrintVerboseCoverage(*args):
+  """Print output if VERBOSE_COVERAGE is non-empty in the environment."""
+  if os.environ.get("VERBOSE_COVERAGE"):
+    print(*args, file=sys.stderr)
+
+def FindCoverageEntryPoint(module_space):
+  cov_tool = '%coverage_tool%'
+  if cov_tool:
+    PrintVerboseCoverage('Using toolchain coverage_tool %r' % cov_tool)
+  else:
+    cov_tool = os.environ.get('PYTHON_COVERAGE')
+    if cov_tool:
+      PrintVerboseCoverage('PYTHON_COVERAGE: %r' % cov_tool)
+  if cov_tool:
+    return FindBinary(module_space, cov_tool)
+  return None
+
+def FindBinary(module_space, bin_name):
+  """Finds the real binary if it's not a normal absolute path."""
+  if not bin_name:
+    return None
+  if bin_name.startswith("//"):
     # Case 1: Path is a label. Not supported yet.
     raise AssertionError(
-      'Bazel does not support execution of Python interpreters via labels yet')
-  elif os.path.isabs(PYTHON_BINARY):
+        "Bazel does not support execution of Python interpreters via labels yet"
+    )
+  elif os.path.isabs(bin_name):
     # Case 2: Absolute path.
-    return PYTHON_BINARY
+    return bin_name
   # Use normpath() to convert slashes to os.sep on Windows.
-  elif os.sep in os.path.normpath(PYTHON_BINARY):
+  elif os.sep in os.path.normpath(bin_name):
     # Case 3: Path is relative to the repo root.
-    return os.path.join(module_space, PYTHON_BINARY)
+    return os.path.join(module_space, bin_name)
   else:
     # Case 4: Path has to be looked up in the search path.
-    return SearchPath(PYTHON_BINARY)
+    return SearchPath(bin_name)
 
 def CreatePythonPathEntries(python_imports, module_space):
   parts = python_imports.split(':')
@@ -213,6 +237,177 @@ def Deduplicate(items):
           seen.add(it)
           yield it
 
+def InstrumentedFilePaths():
+  """Yields tuples of realpath of each instrumented file with the relative path."""
+  manifest_filename = os.environ.get('COVERAGE_MANIFEST')
+  if not manifest_filename:
+    return
+  with open(manifest_filename, "r") as manifest:
+    for line in manifest:
+      filename = line.strip()
+      if not filename:
+        continue
+      try:
+        realpath = os.path.realpath(filename)
+      except OSError:
+        print(
+          "Could not find instrumented file {}".format(filename),
+          file=sys.stderr)
+        continue
+      if realpath != filename:
+        PrintVerboseCoverage("Fixing up {} -> {}".format(realpath, filename))
+        yield (realpath, filename)
+
+def UnresolveSymlinks(output_filename):
+  # type: (str) -> None
+  """Replace realpath of instrumented files with the relative path in the lcov output.
+
+  Though we are asking coveragepy to use relative file names, currently
+  ignore that for purposes of generating the lcov report (and other reports
+  which are not the XML report), so we need to go and fix up the report.
+
+  This function is a workaround for that issue. Once that issue is fixed
+  upstream and the updated version is widely in use, this should be removed.
+
+  See https://github.com/nedbat/coveragepy/issues/963.
+  """
+  substitutions = list(InstrumentedFilePaths())
+  if substitutions:
+    unfixed_file = output_filename + '.tmp'
+    os.rename(output_filename, unfixed_file)
+    with open(unfixed_file, "r") as unfixed:
+      with open(output_filename, "w") as output_file:
+        for line in unfixed:
+          if line.startswith('SF:'):
+            for (realpath, filename) in substitutions:
+              line = line.replace(realpath, filename)
+          output_file.write(line)
+    os.unlink(unfixed_file)
+
+def ExecuteFile(python_program, main_filename, args, env, module_space,
+                coverage_entrypoint, workspace):
+  # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ...
+  """Executes the given Python file using the various environment settings.
+
+  This will not return, and acts much like os.execv, except is much
+  more restricted, and handles Bazel-related edge cases.
+
+  Args:
+    python_program: (str) Path to the Python binary to use for execution
+    main_filename: (str) The Python file to execute
+    args: (list[str]) Additional args to pass to the Python file
+    env: (dict[str, str]) A dict of environment variables to set for the execution
+    module_space: (str) Path to the module space/runfiles tree directory
+    coverage_entrypoint: (str|None) Path to the coverage tool entry point file.
+    workspace: (str|None) Name of the workspace to execute in. This is expected to be a
+        directory under the runfiles tree, and will recursively delete the
+        runfiles directory if set.
+  """
+  # We want to use os.execv instead of subprocess.call, which causes
+  # problems with signal passing (making it difficult to kill
+  # Bazel). However, these conditions force us to run via
+  # subprocess.call instead:
+  #
+  # - On Windows, os.execv doesn't handle arguments with spaces
+  #   correctly, and it actually starts a subprocess just like
+  #   subprocess.call.
+  # - When running in a workspace (i.e., if we're running from a zip),
+  #   we need to clean up the workspace after the process finishes so
+  #   control must return here.
+  # - If we may need to emit a host config warning after execution, we
+  #   can't execv because we need control to return here. This only
+  #   happens for targets built in the host config.
+  # - For coverage targets, at least coveragepy requires running in
+  #   two invocations, which also requires control to return here.
+  #
+  if not (IsWindows() or workspace or coverage_entrypoint):
+    _RunExecv(python_program, main_filename, args, env)
+
+  if coverage_entrypoint is not None:
+    ret_code = _RunForCoverage(python_program, main_filename, args, env,
+                               coverage_entrypoint, workspace)
+  else:
+    ret_code = subprocess.call(
+      [python_program, main_filename] + args,
+      env=env,
+      cwd=workspace
+    )
+
+  if workspace:
+    shutil.rmtree(os.path.dirname(module_space), True)
+  sys.exit(ret_code)
+
+def _RunExecv(python_program, main_filename, args, env):
+  # type: (str, str, list[str], dict[str, str]) -> ...
+  """Executes the given Python file using the various environment settings."""
+  os.environ.update(env)
+  os.execv(python_program, [python_program, main_filename] + args)
+
+def _RunForCoverage(python_program, main_filename, args, env,
+                    coverage_entrypoint, workspace):
+  # type: (str, str, list[str], dict[str, str], str, str|None) -> int
+  """Collects coverage infomration for the given Python file.
+
+  Args:
+    python_program: (str) Path to the Python binary to use for execution
+    main_filename: (str) The Python file to execute
+    args: (list[str]) Additional args to pass to the Python file
+    env: (dict[str, str]) A dict of environment variables to set for the execution
+    coverage_entrypoint: (str|None) Path to the coverage entry point to execute with.
+    workspace: (str|None) Name of the workspace to execute in. This is expected to be a
+        directory under the runfiles tree, and will recursively delete the
+        runfiles directory if set.
+  """
+  # We need for coveragepy to use relative paths.  This can only be configured
+  # via an rc file, so we need to make one.
+  rcfile_name = os.path.join(os.environ['COVERAGE_DIR'], '.coveragerc')
+  with open(rcfile_name, "w") as rcfile:
+    rcfile.write('''[run]
+relative_files = True
+''')
+  PrintVerboseCoverage('Coverage entrypoint:', coverage_entrypoint)
+  # First run the target Python file via coveragepy to create a .coverage
+  # database file, from which we can later export lcov.
+  ret_code = subprocess.call(
+    [
+      python_program,
+      coverage_entrypoint,
+      "run",
+      "--rcfile=" + rcfile_name,
+      "--append",
+      "--branch",
+      main_filename
+    ] + args,
+    env=env,
+    cwd=workspace
+  )
+  output_filename = os.path.join(os.environ['COVERAGE_DIR'], 'pylcov.dat')
+
+  PrintVerboseCoverage('Converting coveragepy database to lcov:', output_filename)
+  # Run coveragepy again to convert its .coverage database file into lcov.
+  ret_code = subprocess.call(
+    [
+      python_program,
+      coverage_entrypoint,
+      "lcov",
+      "--rcfile=" + rcfile_name,
+      "-o",
+      output_filename
+    ],
+    env=env,
+    cwd=workspace
+  ) or ret_code
+  try:
+    os.unlink(rcfile_name)
+  except OSError as err:
+    # It's possible that the profiled program might execute another Python
+    # binary through a wrapper that would then delete the rcfile.  Not much
+    # we can do about that, besides ignore the failure here.
+    PrintVerboseCoverage('Error removing temporary coverage rc file:', err)
+  if os.path.isfile(output_filename):
+    UnresolveSymlinks(output_filename)
+  return ret_code
+
 def Main():
   args = sys.argv[1:]
 
@@ -269,47 +464,55 @@ def Main():
   if python_program is None:
     raise AssertionError('Could not find python binary: ' + PYTHON_BINARY)
 
-  cov_tool = os.environ.get('PYTHON_COVERAGE')
-  if cov_tool:
-    # Inhibit infinite recursion:
-    del os.environ['PYTHON_COVERAGE']
-    if not os.path.exists(cov_tool):
-      raise EnvironmentError('Python coverage tool %s not found.' % cov_tool)
-    args = [python_program, cov_tool, 'run', '-a', '--branch', main_filename] + args
-    # coverage library expects sys.path[0] to contain the library, and replaces
-    # it with the directory of the program it starts. Our actual sys.path[0] is
-    # the runfiles directory, which must not be replaced.
-    # CoverageScript.do_execute() undoes this sys.path[0] setting.
-    #
-    # Update sys.path such that python finds the coverage package. The coverage
-    # entry point is coverage.coverage_main, so we need to do twice the dirname.
-    new_env['PYTHONPATH'] = \
-        new_env['PYTHONPATH'] + ':' + os.path.dirname(os.path.dirname(cov_tool))
-    new_env['PYTHON_LCOV_FILE'] = os.environ.get('COVERAGE_DIR') + '/pylcov.dat'
+  # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured
+  # for something, though it could be another program executing this one or
+  # one executed by this one (e.g. an extension module).
+  if os.environ.get('COVERAGE_DIR'):
+    cov_tool = FindCoverageEntryPoint(module_space)
+    if cov_tool is None:
+      PrintVerboseCoverage('Coverage was enabled, but python coverage tool was not configured.')
+    else:
+      # Inhibit infinite recursion:
+      if 'PYTHON_COVERAGE' in os.environ:
+        del os.environ['PYTHON_COVERAGE']
+
+      if not os.path.exists(cov_tool):
+        raise EnvironmentError(
+          'Python coverage tool %r not found. '
+          'Try running with VERBOSE_COVERAGE=1 to collect more information.'
+          % cov_tool
+        )
+
+      # coverage library expects sys.path[0] to contain the library, and replaces
+      # it with the directory of the program it starts. Our actual sys.path[0] is
+      # the runfiles directory, which must not be replaced.
+      # CoverageScript.do_execute() undoes this sys.path[0] setting.
+      #
+      # Update sys.path such that python finds the coverage package. The coverage
+      # entry point is coverage.coverage_main, so we need to do twice the dirname.
+      python_path_entries = new_env['PYTHONPATH'].split(os.pathsep)
+      python_path_entries.append(os.path.dirname(os.path.dirname(cov_tool)))
+      new_env['PYTHONPATH'] = os.pathsep.join(Deduplicate(python_path_entries))
   else:
-    args = [python_program, main_filename] + args
+    cov_tool = None
+
+  new_env.update((key, val) for key, val in os.environ.items() if key not in new_env)
 
-  os.environ.update(new_env)
+  workspace = None
+  if IsRunningFromZip():
+    # If RUN_UNDER_RUNFILES equals 1, it means we need to
+    # change directory to the right runfiles directory.
+    # (So that the data files are accessible)
+    if os.environ.get('RUN_UNDER_RUNFILES') == '1':
+      workspace = os.path.join(module_space, '%workspace_name%')
 
   try:
     sys.stdout.flush()
-    if IsRunningFromZip():
-      # If RUN_UNDER_RUNFILES equals 1, it means we need to
-      # change directory to the right runfiles directory.
-      # (So that the data files are accessible)
-      if os.environ.get('RUN_UNDER_RUNFILES') == '1':
-        os.chdir(os.path.join(module_space, '%workspace_name%'))
-      ret_code = subprocess.call(args)
-      shutil.rmtree(os.path.dirname(module_space), True)
-      sys.exit(ret_code)
-    else:
-      # On Windows, os.execv doesn't handle arguments with spaces correctly,
-      # and it actually starts a subprocess just like subprocess.call.
-      if IsWindows():
-        ret_code = subprocess.call(args)
-        sys.exit(ret_code)
-      else:
-        os.execv(args[0], args)
+    ExecuteFile(
+      python_program, main_filename, args, new_env, module_space,
+      cov_tool, workspace
+    )
+
   except EnvironmentError:
     # This works from Python 2.4 all the way to 3.x.
     e = sys.exc_info()[1]
diff --git a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntime.java b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntime.java
index fdcad28d301779..5046be6ea52368 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntime.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntime.java
@@ -18,12 +18,16 @@
 import com.google.devtools.build.lib.actions.Artifact;
 import com.google.devtools.build.lib.actions.MutableActionGraph.ActionConflictException;
 import com.google.devtools.build.lib.analysis.ConfiguredTarget;
+import com.google.devtools.build.lib.analysis.FileProvider;
+import com.google.devtools.build.lib.analysis.FilesToRunProvider;
 import com.google.devtools.build.lib.analysis.PrerequisiteArtifacts;
 import com.google.devtools.build.lib.analysis.RuleConfiguredTargetBuilder;
 import com.google.devtools.build.lib.analysis.RuleConfiguredTargetFactory;
 import com.google.devtools.build.lib.analysis.RuleContext;
 import com.google.devtools.build.lib.analysis.RunfilesProvider;
+import com.google.devtools.build.lib.analysis.TransitiveInfoCollection;
 import com.google.devtools.build.lib.collect.nestedset.NestedSet;
+import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
 import com.google.devtools.build.lib.packages.Type;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import javax.annotation.Nullable;
@@ -61,6 +65,32 @@ public ConfiguredTarget create(RuleContext ruleContext)
       ruleContext.attributeError("interpreter_path", "must be an absolute path.");
     }
 
+    Artifact coverageTool = null;
+    NestedSet<Artifact> coverageFiles = null;
+    TransitiveInfoCollection coverageTarget = ruleContext.getPrerequisite("coverage_tool");
+    if (coverageTarget != null) {
+      NestedSet<Artifact> coverageToolFiles =
+          coverageTarget.getProvider(FileProvider.class).getFilesToBuild();
+      if (coverageToolFiles.isSingleton()) {
+        coverageTool = coverageToolFiles.getSingleton();
+      } else {
+        FilesToRunProvider filesToRun = coverageTarget.getProvider(FilesToRunProvider.class);
+        if (filesToRun == null) {
+          ruleContext.attributeError(
+              "coverage_tool", "must be an executable target or must produce exactly one file.");
+        } else {
+          coverageTool = filesToRun.getExecutable();
+        }
+      }
+      NestedSetBuilder<Artifact> result = NestedSetBuilder.stableOrder();
+      result.addTransitive(coverageToolFiles);
+      RunfilesProvider runfilesProvider = coverageTarget.getProvider(RunfilesProvider.class);
+      if (runfilesProvider != null) {
+        result.addTransitive(runfilesProvider.getDefaultRunfiles().getArtifacts());
+      }
+      coverageFiles = result.build();
+    }
+
     if (pythonVersion == PythonVersion._INTERNAL_SENTINEL) {
       if (pyConfig.useToolchains()) {
         ruleContext.attributeError(
@@ -83,8 +113,10 @@ public ConfiguredTarget create(RuleContext ruleContext)
 
     PyRuntimeInfo provider =
         hermetic
-            ? PyRuntimeInfo.createForInBuildRuntime(interpreter, files, pythonVersion, stubShebang)
-            : PyRuntimeInfo.createForPlatformRuntime(interpreterPath, pythonVersion, stubShebang);
+            ? PyRuntimeInfo.createForInBuildRuntime(
+                interpreter, files, coverageTool, coverageFiles, pythonVersion, stubShebang)
+            : PyRuntimeInfo.createForPlatformRuntime(
+                interpreterPath, coverageTool, coverageFiles, pythonVersion, stubShebang);
 
     return new RuleConfiguredTargetBuilder(ruleContext)
         .setFilesToBuild(files)
diff --git a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfo.java b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfo.java
index 2e2f1ad0eeb69c..5d812965b67075 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfo.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfo.java
@@ -56,6 +56,8 @@ public final class PyRuntimeInfo implements Info, PyRuntimeInfoApi<Artifact> {
   @Nullable private final Artifact interpreter;
   // Validated on initialization to contain Artifact
   @Nullable private final Depset files;
+  @Nullable private final Artifact coverageTool;
+  @Nullable private final Depset coverageFiles;
   /** Invariant: either PY2 or PY3. */
   private final PythonVersion pythonVersion;
 
@@ -66,15 +68,20 @@ private PyRuntimeInfo(
       @Nullable PathFragment interpreterPath,
       @Nullable Artifact interpreter,
       @Nullable Depset files,
+      @Nullable Artifact coverageTool,
+      @Nullable Depset coverageFiles,
       PythonVersion pythonVersion,
       @Nullable String stubShebang) {
     Preconditions.checkArgument((interpreterPath == null) != (interpreter == null));
     Preconditions.checkArgument((interpreter == null) == (files == null));
+    Preconditions.checkArgument((coverageTool == null) == (coverageFiles == null));
     Preconditions.checkArgument(pythonVersion.isTargetValue());
     this.location = location != null ? location : Location.BUILTIN;
     this.files = files;
     this.interpreterPath = interpreterPath;
     this.interpreter = interpreter;
+    this.coverageTool = coverageTool;
+    this.coverageFiles = coverageFiles;
     this.pythonVersion = pythonVersion;
     if (stubShebang != null && !stubShebang.isEmpty()) {
       this.stubShebang = stubShebang;
@@ -97,6 +104,8 @@ public Location getCreationLocation() {
   public static PyRuntimeInfo createForInBuildRuntime(
       Artifact interpreter,
       NestedSet<Artifact> files,
+      @Nullable Artifact coverageTool,
+      @Nullable NestedSet<Artifact> coverageFiles,
       PythonVersion pythonVersion,
       @Nullable String stubShebang) {
     return new PyRuntimeInfo(
@@ -104,18 +113,26 @@ public static PyRuntimeInfo createForInBuildRuntime(
         /*interpreterPath=*/ null,
         interpreter,
         Depset.of(Artifact.TYPE, files),
+        coverageTool,
+        coverageFiles == null ? null : Depset.of(Artifact.TYPE, coverageFiles),
         pythonVersion,
         stubShebang);
   }
 
   /** Constructs an instance from native rule logic (built-in location) for a platform runtime. */
   public static PyRuntimeInfo createForPlatformRuntime(
-      PathFragment interpreterPath, PythonVersion pythonVersion, @Nullable String stubShebang) {
+      PathFragment interpreterPath,
+      @Nullable Artifact coverageTool,
+      @Nullable NestedSet<Artifact> coverageFiles,
+      PythonVersion pythonVersion,
+      @Nullable String stubShebang) {
     return new PyRuntimeInfo(
         /*location=*/ null,
         interpreterPath,
         /*interpreter=*/ null,
         /*files=*/ null,
+        coverageTool,
+        coverageFiles == null ? null : Depset.of(Artifact.TYPE, coverageFiles),
         pythonVersion,
         stubShebang);
   }
@@ -131,12 +148,21 @@ public boolean equals(Object other) {
     return (this.interpreterPath.equals(otherInfo.interpreterPath)
         && this.interpreter.equals(otherInfo.interpreter)
         && this.files.equals(otherInfo.files)
+        && this.coverageTool.equals(otherInfo.coverageTool)
+        && this.coverageFiles.equals(otherInfo.coverageFiles)
         && this.stubShebang.equals(otherInfo.stubShebang));
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(PyRuntimeInfo.class, interpreterPath, interpreter, files, stubShebang);
+    return Objects.hash(
+        PyRuntimeInfo.class,
+        interpreterPath,
+        interpreter,
+        coverageTool,
+        coverageFiles,
+        files,
+        stubShebang);
   }
 
   /**
@@ -191,6 +217,27 @@ public Depset getFilesForStarlark() {
     return files;
   }
 
+  @Override
+  @Nullable
+  public Artifact getCoverageTool() {
+    return coverageTool;
+  }
+
+  @Nullable
+  public NestedSet<Artifact> getCoverageToolFiles() {
+    try {
+      return coverageFiles == null ? null : coverageFiles.getSet(Artifact.class);
+    } catch (Depset.TypeException ex) {
+      throw new IllegalStateException("for coverage_runfiles, " + ex.getMessage());
+    }
+  }
+
+  @Override
+  @Nullable
+  public Depset getCoverageToolFilesForStarlark() {
+    return coverageFiles;
+  }
+
   public PythonVersion getPythonVersion() {
     return pythonVersion;
   }
@@ -213,6 +260,8 @@ public PyRuntimeInfo constructor(
         Object interpreterPathUncast,
         Object interpreterUncast,
         Object filesUncast,
+        Object coverageToolUncast,
+        Object coverageFilesUncast,
         String pythonVersion,
         String stubShebang,
         StarlarkThread thread)
@@ -226,6 +275,13 @@ public PyRuntimeInfo constructor(
         Depset.cast(filesUncast, Artifact.class, "files");
         filesDepset = (Depset) filesUncast;
       }
+      Artifact coverageTool = coverageToolUncast == NONE ? null : (Artifact) coverageToolUncast;
+      Depset coverageDepset = null;
+      if (coverageFilesUncast != NONE) {
+        // Validate type of filesDepset.
+        Depset.cast(coverageFilesUncast, Artifact.class, "coverage_files");
+        coverageDepset = (Depset) coverageFilesUncast;
+      }
 
       if ((interpreter == null) == (interpreterPath == null)) {
         throw Starlark.errorf(
@@ -253,6 +309,8 @@ public PyRuntimeInfo constructor(
             /*interpreterPath=*/ null,
             interpreter,
             filesDepset,
+            coverageTool,
+            coverageDepset,
             parsedPythonVersion,
             stubShebang);
       } else {
@@ -261,6 +319,8 @@ public PyRuntimeInfo constructor(
             PathFragment.create(interpreterPath),
             /*interpreter=*/ null,
             /*files=*/ null,
+            coverageTool,
+            coverageDepset,
             parsedPythonVersion,
             stubShebang);
       }
diff --git a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeRule.java b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeRule.java
index aceca7b76b412c..1ec90325df7965 100644
--- a/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeRule.java
+++ b/src/main/java/com/google/devtools/build/lib/rules/python/PyRuntimeRule.java
@@ -55,6 +55,22 @@ public RuleClass build(RuleClass.Builder builder, RuleDefinitionEnvironment env)
         <!-- #END_BLAZE_RULE.ATTRIBUTE --> */
         .add(attr("interpreter_path", STRING))
 
+        /* <!-- #BLAZE_RULE(py_runtime).ATTRIBUTE(coverage_tool) -->
+        This is a target to use for collecting code coverage information from <code>py_binary</code>
+        and <code>py_test</code> targets.
+
+        <p>If set, the target must either produce a single file or be and executable target.
+        The path to the single file, or the executable if the target is executable,
+        determines the entry point for the python coverage tool.  The target and its
+        runfiles will be added to the runfiles when coverage is enabled.</p>
+
+        <p>The entry point for the tool must be loadable by a python interpreter (e.g. a
+        <code>.py</code> or <code>.pyc</code> file).  It must accept the command line arguments
+        of <a href="https://coverage.readthedocs.io/">coverage.py</a>, at least including
+        the <code>run</code> and <code>lcov</code> subcommands.
+        <!-- #END_BLAZE_RULE.ATTRIBUTE --> */
+        .add(attr("coverage_tool", LABEL).allowedFileTypes(FileTypeSet.NO_FILE))
+
         /* <!-- #BLAZE_RULE(py_runtime).ATTRIBUTE(python_version) -->
         Whether this runtime is for Python major version 2 or 3. Valid values are <code>"PY2"</code>
         and <code>"PY3"</code>.
diff --git a/src/main/java/com/google/devtools/build/lib/starlarkbuildapi/python/PyRuntimeInfoApi.java b/src/main/java/com/google/devtools/build/lib/starlarkbuildapi/python/PyRuntimeInfoApi.java
index 743bb888624bd7..3aa47673d0ea3c 100644
--- a/src/main/java/com/google/devtools/build/lib/starlarkbuildapi/python/PyRuntimeInfoApi.java
+++ b/src/main/java/com/google/devtools/build/lib/starlarkbuildapi/python/PyRuntimeInfoApi.java
@@ -82,6 +82,26 @@ public interface PyRuntimeInfoApi<FileT extends FileApi> extends StarlarkValue {
   @Nullable
   Depset getFilesForStarlark();
 
+  @StarlarkMethod(
+      name = "coverage_tool",
+      structField = true,
+      allowReturnNones = true,
+      doc =
+          "If set, this field is a <code>File</code> representing tool used for collecting code "
+              + "coverage information from python tests. Otherwise, this is <code>None</code>.")
+  @Nullable
+  FileT getCoverageTool();
+
+  @StarlarkMethod(
+      name = "coverage_files",
+      structField = true,
+      allowReturnNones = true,
+      doc =
+          "The files required at runtime for using <code>coverage_tool</code>. "
+              + "Will be <code>None</code> if no <code>coverage_tool</code> was provided.")
+  @Nullable
+  Depset getCoverageToolFilesForStarlark();
+
   @StarlarkMethod(
       name = "python_version",
       structField = true,
@@ -145,6 +165,29 @@ interface PyRuntimeInfoProviderApi extends ProviderApi {
                       + "for this argument if you pass in <code>interpreter_path</code>. If "
                       + "<code>interpreter</code> is given and this argument is <code>None</code>, "
                       + "<code>files</code> becomes an empty <code>depset</code> instead."),
+          @Param(
+              name = "coverage_tool",
+              allowedTypes = {
+                @ParamType(type = FileApi.class),
+                @ParamType(type = NoneType.class),
+              },
+              positional = false,
+              named = true,
+              defaultValue = "None",
+              doc = "The value for the new object's <code>coverage_tool</code> field."),
+          @Param(
+              name = "coverage_files",
+              allowedTypes = {
+                @ParamType(type = Depset.class, generic1 = FileApi.class),
+                @ParamType(type = NoneType.class),
+              },
+              positional = false,
+              named = true,
+              defaultValue = "None",
+              doc =
+                  "The value for the new object's <code>coverage_files</code> field. Do not give a "
+                      + "value for this argument if you do not also pass in "
+                      + "<code>coverage_tool</code>."),
           @Param(
               name = "python_version",
               positional = false,
@@ -169,6 +212,8 @@ PyRuntimeInfoApi<?> constructor(
         Object interpreterPathUncast,
         Object interpreterUncast,
         Object filesUncast,
+        Object coverageToolUncast,
+        Object coverageFilesUncast,
         String pythonVersion,
         String stubShebang,
         StarlarkThread thread)
diff --git a/src/test/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfoTest.java b/src/test/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfoTest.java
index 03a91656267965..3d51090fbb171c 100644
--- a/src/test/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfoTest.java
+++ b/src/test/java/com/google/devtools/build/lib/rules/python/PyRuntimeInfoTest.java
@@ -58,7 +58,8 @@ private static void assertHasOrderAndContainsExactly(
   public void factoryMethod_InBuildRuntime() throws Exception {
     NestedSet<Artifact> files = NestedSetBuilder.create(Order.STABLE_ORDER, dummyFile);
     PyRuntimeInfo inBuildRuntime =
-        PyRuntimeInfo.createForInBuildRuntime(dummyInterpreter, files, PythonVersion.PY2, null);
+        PyRuntimeInfo.createForInBuildRuntime(
+            dummyInterpreter, files, null, null, PythonVersion.PY2, null);
 
     assertThat(inBuildRuntime.getCreationLocation()).isEqualTo(Location.BUILTIN);
     assertThat(inBuildRuntime.getInterpreterPath()).isNull();
@@ -75,7 +76,7 @@ public void factoryMethod_InBuildRuntime() throws Exception {
   public void factoryMethod_PlatformRuntime() {
     PathFragment path = PathFragment.create("/system/interpreter");
     PyRuntimeInfo platformRuntime =
-        PyRuntimeInfo.createForPlatformRuntime(path, PythonVersion.PY2, null);
+        PyRuntimeInfo.createForPlatformRuntime(path, null, null, PythonVersion.PY2, null);
 
     assertThat(platformRuntime.getCreationLocation()).isEqualTo(Location.BUILTIN);
     assertThat(platformRuntime.getInterpreterPath()).isEqualTo(path);
diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD
index f34213f95fa813..57b14aba2391b1 100644
--- a/src/test/shell/bazel/BUILD
+++ b/src/test/shell/bazel/BUILD
@@ -523,6 +523,15 @@ sh_test(
     ],
 )
 
+sh_test(
+    name = "bazel_coverage_hermetic_py_test",
+    srcs = ["bazel_coverage_hermetic_py_test.sh"],
+    data = [":test-deps"],
+    tags = [
+        "no_windows",
+    ],
+)
+
 sh_test(
     name = "bazel_coverage_sh_test",
     srcs = ["bazel_coverage_sh_test.sh"],
diff --git a/src/test/shell/bazel/bazel_coverage_hermetic_py_test.sh b/src/test/shell/bazel/bazel_coverage_hermetic_py_test.sh
new file mode 100755
index 00000000000000..0da772c7678769
--- /dev/null
+++ b/src/test/shell/bazel/bazel_coverage_hermetic_py_test.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+#
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eu
+
+# Load the test setup defined in the parent directory
+CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${CURRENT_DIR}/../integration_test_setup.sh" \
+  || { echo "integration_test_setup.sh not found!" >&2; exit 1; }
+
+# Fetch hermetic python and register toolchain.
+function set_up() {
+    cat >>WORKSPACE <<EOF
+register_toolchains(
+    "//:python_toolchain",
+)
+EOF
+}
+
+# Returns the path of the code coverage report that was generated by Bazel by
+# looking at the current $TEST_log. The method fails if TEST_log does not
+# contain any coverage report for a passed test.
+function get_coverage_file_path_from_test_log() {
+  local ending_part
+  ending_part="$(sed -n -e '/PASSED/,$p' "$TEST_log")"
+
+  local coverage_file_path
+  coverage_file_path=$(grep -Eo "/[/a-zA-Z0-9\.\_\-]+\.dat$" <<< "$ending_part")
+  [[ -e "$coverage_file_path" ]] || fail "Coverage output file does not exist!"
+  echo "$coverage_file_path"
+}
+
+function set_up_py_test_coverage() {
+  # Set up python toolchain.
+  cat <<EOF > BUILD
+load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair")
+
+py_runtime(
+    name = "py3_runtime",
+    coverage_tool = ":mock_coverage",
+    interpreter_path = "$(which python3)",
+    python_version = "PY3",
+)
+
+py_runtime_pair(
+    name = "python_runtimes",
+    py2_runtime = None,
+    py3_runtime = ":py3_runtime",
+)
+
+toolchain(
+    name = "python_toolchain",
+    toolchain = ":python_runtimes",
+    toolchain_type = "@bazel_tools//tools/python:toolchain_type",
+)
+EOF
+  # Add a py_library and test.
+  cat <<EOF >> BUILD
+py_library(
+    name = "hello",
+    srcs = ["hello.py"],
+)
+
+py_library(
+    name = "mock_coverage",
+    srcs = ["mock_coverage.py"],
+    deps = [":coverage_support"],
+)
+
+py_library(
+    name = "coverage_support",
+    srcs = ["coverage_support.py"],
+)
+
+py_test(
+    name = "hello_test",
+    srcs = ["hello_test.py"],
+    deps = [":hello"],
+)
+EOF
+  echo "# fake dependency" > coverage_support.py
+  cat <<EOF > mock_coverage.py
+#!/usr/bin/env python3
+import argparse
+import os
+import subprocess
+import sys
+import coverage_support
+parser = argparse.ArgumentParser()
+mode = sys.argv[1]
+del(sys.argv[1])
+parser.add_argument("--rcfile", type=str)
+parser.add_argument("--append", action="store_true")
+parser.add_argument("--branch", action="store_true")
+parser.add_argument("--output", "-o", type=str)
+parser.add_argument("target", nargs="*")
+args = parser.parse_args()
+tmp_cov_file = os.path.join(os.environ["COVERAGE_DIR"], "tmp.out")
+if mode == "run":
+  subprocess.check_call([sys.executable]+args.target)
+  with open(tmp_cov_file, "a") as tmp:
+    tmp.write("TN:\nSF:")
+    tmp.write(os.path.join(os.path.dirname(os.path.realpath(args.target[0])), "hello.py"))
+    tmp.write("""
+FNF:0
+FNH:0
+DA:1,1,fi+A0ud2xABMExsbhdW38w
+DA:2,1,3qA2I6CcUyJmcd1vpeVcRA
+DA:4,1,nFnrj5CwYCqkvbVhPUFVVw
+DA:5,0,RmWioilSA3bI5NbLlwiuSA
+LH:3
+LF:4
+end_of_record
+""")
+else:
+  with open(args.output, "w") as out_file:
+    with open(tmp_cov_file, "r") as in_file:
+      out_file.write(in_file.read())
+EOF
+  cat <<EOF > hello.py
+def Hello():
+  print("Hello, world!")
+
+def Goodbye():
+  print("Goodbye, world!")
+EOF
+  cat <<EOF > hello_test.py
+import unittest
+import hello
+
+class Tests(unittest.TestCase):
+  def testHello(self):
+    hello.Hello()
+
+if __name__ == "__main__":
+  unittest.main()
+EOF
+  cat <<EOF > expected.dat
+SF:hello.py
+FNF:0
+FNH:0
+DA:1,1,fi+A0ud2xABMExsbhdW38w
+DA:2,1,3qA2I6CcUyJmcd1vpeVcRA
+DA:4,1,nFnrj5CwYCqkvbVhPUFVVw
+DA:5,0,RmWioilSA3bI5NbLlwiuSA
+LH:3
+LF:4
+end_of_record
+EOF
+}
+
+function test_py_test_coverage() {
+  set_up_py_test_coverage
+  bazel coverage --test_output=all //:hello_test &>$TEST_log || fail "Coverage for //:hello_test failed"
+  local coverage_file_path
+  coverage_file_path="$( get_coverage_file_path_from_test_log )"
+  diff expected.dat "$coverage_file_path" >> $TEST_log
+  cmp expected.dat "$coverage_file_path" || fail "Coverage output file is different than the expected file for py_library."
+}
+
+run_suite "test tests"