Skip to content

Commit

Permalink
Relax check_version_info to check for bytecode compatibility
Browse files Browse the repository at this point in the history
As discussed in #3988, the intention of check_version_info is to ensure
that cloudpickle can transfer objects across the two Python
interpreters. Because cloudpickle serializes Python bytecode, it is
known to be incompatible across minor versions (e.g., 3.11 to 3.12) but
should be compatible across patch versions (e.g., 3.12.0 to 3.12.1).

The intention of the CPython team is that bytecode should be compatible
within patch releases to the same minor version. This was last broken in
3.5.3, which was regarded as a mistake: see the commit message of
python/cpython@93602e3 (in 3.5.10),
which implies that it is reasonable to "rel[y] on pre-cached bytecode
remaining valid across maintenance releases."

The constant importlib.util.MAGIC_NUMBER is used by Python to identify
bytecode compatibility (it is stored in .pyc files and checked when they
are loaded). The unwanted change in 3.5.3 bumped MAGIC_NUMBER, but since
then, it has only changed between minor versions. See the long comment
in CPython's Lib/importlib/_bootstrap_external.py for a history of the
changes.

So, the practical effect of checking MAGIC_NUMBER will generally be to
enforce that nodes run the same minor version of Python, but if some
future patch release unexpectedly does break bytecode compatibility,
checking MAGIC_NUMBER will account for that.

Signed-off-by: Geoffrey Thomas <[email protected]>
  • Loading branch information
geofft committed Nov 24, 2023
1 parent d3ac31e commit 10443bb
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 15 deletions.
9 changes: 6 additions & 3 deletions python/ray/_private/usage/usage_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
To see collected/reported data, see `usage_stats.json` inside a temp
folder (e.g., /tmp/ray/session_[id]/*).
"""
import importlib.util
import json
import logging
import threading
Expand Down Expand Up @@ -349,11 +350,13 @@ def usage_stats_prompt_enabled():
def _generate_cluster_metadata():
"""Return a dictionary of cluster metadata."""
ray_version, python_version = ray._private.utils.compute_version_info()
# These two metadata is necessary although usage report is not enabled
# to check version compatibility.
# These three metadata items are recorded to check version
# compatibility. They are necessary whether or not usage stats are
# enabled.
metadata = {
"ray_version": ray_version,
"python_version": python_version,
"python_magic_number": importlib.util.MAGIC_NUMBER.hex(),
}
# Additional metadata is recorded only when usage stats are enabled.
if usage_stats_enabled():
Expand All @@ -368,7 +371,7 @@ def _generate_cluster_metadata():
}
)
if sys.platform == "linux":
# Record llibc version
# Record libc version
(lib, ver) = platform.libc_ver()
if not lib:
metadata.update({"libc_version": "NA"})
Expand Down
57 changes: 45 additions & 12 deletions python/ray/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import binascii
from collections import defaultdict
import contextlib
from dataclasses import dataclass
import errno
import functools
import importlib
import importlib.util
import inspect
import json
import logging
Expand Down Expand Up @@ -1492,15 +1494,47 @@ def internal_kv_put_with_retry(gcs_client, key, value, namespace, num_retries=20
raise error


@dataclass
class VersionInfo:
ray_version: str
python_version: str
python_magic_number: Optional[str]

def __str__(self):
if self.python_magic_number is not None:
magic_str = f" (magic number {self.python_magic_number})"
else:
magic_str = ""
return (
f" Ray: {self.ray_version}\n"
+ f" Python: {self.python_version}{magic_str}\n"
)

def __eq__(self, other):
if self.ray_version != other.ray_version:
return False
# If we have the magic number from both sides, use it; if not,
# fall back to exact Python version comparison.
if (
self.python_magic_number is not None
and other.python_magic_number is not None
):
return self.python_magic_number == other.python_magic_number
else:
return self.python_version == other.python_version


def compute_version_info():
"""Compute the versions of Python, and Ray.
Returns:
A tuple containing the version information.
A VersionInfo object for the current interpreter.
"""
ray_version = ray.__version__
python_version = ".".join(map(str, sys.version_info[:3]))
return ray_version, python_version
return VersionInfo(
ray_version=ray.__version__,
python_version=".".join(map(str, sys.version_info[:3])),
python_magic_number=importlib.util.MAGIC_NUMBER.hex(),
)


def get_directory_size_bytes(path: Union[str, Path] = ".") -> int:
Expand All @@ -1524,20 +1558,19 @@ def check_version_info(cluster_metadata):
Raises:
Exception: An exception is raised if there is a version mismatch.
"""
cluster_version_info = (
cluster_metadata["ray_version"],
cluster_metadata["python_version"],
cluster_version_info = VersionInfo(
ray_version=cluster_metadata["ray_version"],
python_version=cluster_metadata["python_version"],
python_magic_number=cluster_metadata.get("python_magic_number"),
)
version_info = compute_version_info()
if version_info != cluster_version_info:
node_ip_address = ray._private.services.get_node_ip_address()
error_message = (
"Version mismatch: The cluster was started with:\n"
" Ray: " + cluster_version_info[0] + "\n"
" Python: " + cluster_version_info[1] + "\n"
"This process on node " + node_ip_address + " was started with:" + "\n"
" Ray: " + version_info[0] + "\n"
" Python: " + version_info[1] + "\n"
+ f"{cluster_version_info}"
+ f"This process on node {node_ip_address} was started with:\n"
+ f"{version_info}"
)
raise RuntimeError(error_message)

Expand Down

0 comments on commit 10443bb

Please sign in to comment.