Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/source/ray-core/handling-dependencies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting a local directory per-task or per-actor is currently unsupported; it can only be set per-job (i.e., in ``ray.init()``).

Note: If the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: In accordance with technical writing style guide:

  • Active tense
  • Avoiding future tense
  • Contractions

Not sure if "Ray" is the right subject in "Ray doesn't upload". Might need to check me on that.

Suggested change
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, Ray doesn't upload the specified files to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.


Note: If the local directory contains symbolic links, Ray follows the links and the files they point to are uploaded to the cluster.

Expand All @@ -532,7 +532,8 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting options (1), (3) and (4) per-task or per-actor is currently unsupported, it can only be set per-job (i.e., in ``ray.init()``).

Note: For option (1), if the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
Note: For option (1), by default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same suggestion here


- ``py_executable`` (str): Specifies the executable used for running the Ray workers. It can include arguments as well. The executable can be
located in the `working_dir`. This runtime environment is useful to run workers in a custom debugger or profiler as well as to run workers
Expand Down
103 changes: 84 additions & 19 deletions python/ray/_private/runtime_env/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from ray._private.path_utils import is_path
from ray._private.ray_constants import (
GRPC_CPP_MAX_MESSAGE_SIZE,
RAY_RUNTIME_ENV_IGNORE_GITIGNORE,
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_DEFAULT,
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_ENV_VAR,
)
Expand Down Expand Up @@ -86,16 +85,17 @@ def _dir_travel(
path: Path,
excludes: List[Callable],
handler: Callable,
include_gitignore: bool,
logger: Optional[logging.Logger] = default_logger,
):
"""Travels the path recursively, calling the handler on each subpath.

Respects excludes, which will be called to check if this path is skipped.
"""
e = _get_gitignore(path)

if e is not None:
excludes.append(e)
new_excludes = get_excludes_from_ignore_files(
path, include_gitignore=include_gitignore, logger=logger
)
excludes.extend(new_excludes)

skip = any(e(path) for e in excludes)
if not skip:
Expand All @@ -106,9 +106,15 @@ def _dir_travel(
raise e
if path.is_dir():
for sub_path in path.iterdir():
_dir_travel(sub_path, excludes, handler, logger=logger)
_dir_travel(
sub_path,
excludes,
handler,
include_gitignore=include_gitignore,
logger=logger,
)

if e is not None:
for _ in range(len(new_excludes)):
excludes.pop()


Expand Down Expand Up @@ -166,6 +172,7 @@ def _hash_directory(
root: Path,
relative_path: Path,
excludes: Optional[Callable],
include_gitignore: bool,
logger: Optional[logging.Logger] = default_logger,
) -> bytes:
"""Helper function to create hash of a directory.
Expand All @@ -183,7 +190,9 @@ def handler(path: Path):
hash_val = _xor_bytes(hash_val, file_hash)

excludes = [] if excludes is None else [excludes]
_dir_travel(root, excludes, handler, logger=logger)
_dir_travel(
root, excludes, handler, include_gitignore=include_gitignore, logger=logger
)
return hash_val


Expand Down Expand Up @@ -280,24 +289,21 @@ def match(p: Path):
return match


def _get_gitignore(path: Path) -> Optional[Callable]:
def _get_ignore_file(path: Path, ignore_file: str) -> Optional[Callable]:
"""Returns a function that returns True if the path should be excluded.

Returns None if there is no .gitignore file in the path, or if the
RAY_RUNTIME_ENV_IGNORE_GITIGNORE environment variable is set to 1.
Returns None if there is no ignore_file in the path.

Args:
path: The path to the directory to check for a .gitignore file.
path: The path to the directory to check for an ignore file.
ignore_file: The name of the ignore file.

Returns:
A function that returns True if the path should be excluded.
"""
ignore_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") == "1"
if ignore_gitignore:
return None

path = path.absolute()
ignore_file = path / ".gitignore"
ignore_file = path / ignore_file
if ignore_file.is_file():
with ignore_file.open("r") as f:
pathspec = PathSpec.from_lines("gitwildmatch", f.readlines())
Expand All @@ -311,6 +317,43 @@ def match(p: Path):
return None


def get_excludes_from_ignore_files(
path: Path,
include_gitignore: bool,
logger: Optional[logging.Logger] = default_logger,
) -> List[Callable]:
"""Get exclusion functions from .gitignore and .rayignore files in the current path.

Args:
path: The path to check for ignore files.
include_gitignore: Whether to respect .gitignore files.
logger: Logger to use.

Returns:
List[Callable]: List of exclusion functions. Each function takes a Path
and returns True if the path should be excluded based on the ignore
patterns in the respective ignore file.
"""
ignore_files = []

to_ignore: List[Optional[Callable]] = []
if include_gitignore:
g = _get_ignore_file(path, ignore_file=".gitignore")
if g is not None:
to_ignore.append(g)
ignore_files.append(path / ".gitignore")

r = _get_ignore_file(path, ignore_file=".rayignore")
if r is not None:
to_ignore.append(r)
ignore_files.append(path / ".rayignore")

if ignore_files:
logger.info(f"Ignoring upload to cluster for these files: {ignore_files}")

return to_ignore


def pin_runtime_env_uri(uri: str, *, expiration_s: Optional[int] = None) -> None:
"""Pin a reference to a runtime_env URI in the GCS on a timeout.

Expand Down Expand Up @@ -403,6 +446,7 @@ def _zip_files(
path_str: str,
excludes: List[str],
output_path: str,
include_gitignore: bool,
include_parent_dir: bool = False,
logger: Optional[logging.Logger] = default_logger,
) -> None:
Expand Down Expand Up @@ -440,7 +484,13 @@ def handler(path: Path):
zip_handler.write(path, to_path)

excludes = [_get_excludes(file_path, excludes)]
_dir_travel(file_path, excludes, handler, logger=logger)
_dir_travel(
file_path,
excludes,
handler,
include_gitignore=include_gitignore,
logger=logger,
)


def package_exists(pkg_uri: str) -> bool:
Expand Down Expand Up @@ -508,7 +558,11 @@ def get_uri_for_file(file: str) -> str:
)


def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None) -> str:
def get_uri_for_directory(
directory: str,
include_gitignore: bool,
excludes: Optional[List[str]] = None,
) -> str:
"""Get a content-addressable URI from a directory's contents.

This function generates the name of the package by the directory.
Expand All @@ -524,6 +578,7 @@ def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None)

Args:
directory: The directory.
include_gitignore: Whether to respect .gitignore files.
excludes (list[str]): The dir or files that should be excluded.

Returns:
Expand All @@ -539,7 +594,12 @@ def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None)
if not directory.exists() or not directory.is_dir():
raise ValueError(f"directory {directory} must be an existing directory")

hash_val = _hash_directory(directory, directory, _get_excludes(directory, excludes))
hash_val = _hash_directory(
directory,
directory,
_get_excludes(directory, excludes),
include_gitignore=include_gitignore,
)

return "{protocol}://{pkg_name}.zip".format(
protocol=Protocol.GCS.value, pkg_name=RAY_PKG_PREFIX + hash_val.hex()
Expand Down Expand Up @@ -574,6 +634,7 @@ def upload_package_to_gcs(pkg_uri: str, pkg_bytes: bytes) -> None:
def create_package(
module_path: str,
target_path: Path,
include_gitignore: bool,
include_parent_dir: bool = False,
excludes: Optional[List[str]] = None,
logger: Optional[logging.Logger] = default_logger,
Expand All @@ -590,6 +651,7 @@ def create_package(
module_path,
excludes,
str(target_path),
include_gitignore=include_gitignore,
include_parent_dir=include_parent_dir,
logger=logger,
)
Expand All @@ -599,6 +661,7 @@ def upload_package_if_needed(
pkg_uri: str,
base_directory: str,
module_path: str,
include_gitignore: bool,
include_parent_dir: bool = False,
excludes: Optional[List[str]] = None,
logger: Optional[logging.Logger] = default_logger,
Expand All @@ -617,6 +680,7 @@ def upload_package_if_needed(
include_parent_dir: If true, includes the top-level directory as a
directory inside the zip file.
excludes: List specifying files to exclude.
include_gitignore: Whether to respect .gitignore files. Default is True.

Raises:
RuntimeError: If the upload fails.
Expand Down Expand Up @@ -646,6 +710,7 @@ def upload_package_if_needed(
create_package(
module_path,
package_file,
include_gitignore=include_gitignore,
include_parent_dir=include_parent_dir,
excludes=excludes,
)
Expand Down
10 changes: 8 additions & 2 deletions python/ray/_private/runtime_env/py_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def _check_is_uri(s: str) -> bool:

def upload_py_modules_if_needed(
runtime_env: Dict[str, Any],
include_gitignore: bool,
scratch_dir: Optional[str] = os.getcwd(),
logger: Optional[logging.Logger] = default_logger,
upload_fn=None,
Expand Down Expand Up @@ -102,7 +103,11 @@ def upload_py_modules_if_needed(
is_dir = Path(module_path).is_dir()
excludes = runtime_env.get("excludes", None)
if is_dir:
module_uri = get_uri_for_directory(module_path, excludes=excludes)
module_uri = get_uri_for_directory(
module_path,
include_gitignore=include_gitignore,
excludes=excludes,
)
else:
module_uri = get_uri_for_file(module_path)
if upload_fn is None:
Expand All @@ -111,8 +116,9 @@ def upload_py_modules_if_needed(
module_uri,
scratch_dir,
module_path,
excludes=excludes,
include_gitignore=include_gitignore,
include_parent_dir=is_dir,
excludes=excludes,
logger=logger,
)
except Exception as e:
Expand Down
8 changes: 7 additions & 1 deletion python/ray/_private/runtime_env/working_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

def upload_working_dir_if_needed(
runtime_env: Dict[str, Any],
include_gitignore: bool,
scratch_dir: Optional[str] = os.getcwd(),
logger: Optional[logging.Logger] = default_logger,
upload_fn: Optional[Callable[[str, Optional[List[str]]], None]] = None,
Expand Down Expand Up @@ -64,7 +65,11 @@ def upload_working_dir_if_needed(

excludes = runtime_env.get("excludes", None)
try:
working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
working_dir_uri = get_uri_for_directory(
working_dir,
include_gitignore=include_gitignore,
excludes=excludes,
)
except ValueError: # working_dir is not a directory
package_path = Path(working_dir)
if not package_path.exists() or package_path.suffix != ".zip":
Expand All @@ -90,6 +95,7 @@ def upload_working_dir_if_needed(
working_dir,
include_parent_dir=False,
excludes=excludes,
include_gitignore=include_gitignore,
logger=logger,
)
except Exception as e:
Expand Down
15 changes: 13 additions & 2 deletions python/ray/_private/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2610,13 +2610,24 @@ def connect(
# environment here. If it's ray client, the environment will be prepared
# at the server side.
if mode == SCRIPT_MODE and not job_config._client_job and job_config.runtime_env:
from ray._private.ray_constants import RAY_RUNTIME_ENV_IGNORE_GITIGNORE

scratch_dir: str = worker.node.get_runtime_env_dir_path()
runtime_env = job_config.runtime_env or {}
# Determine whether to respect .gitignore files based on environment variable
# Default is True (respect .gitignore). Set to False if env var is "1".
include_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") != "1"
runtime_env = upload_py_modules_if_needed(
runtime_env, scratch_dir, logger=logger
runtime_env,
include_gitignore=include_gitignore,
scratch_dir=scratch_dir,
logger=logger,
)
runtime_env = upload_working_dir_if_needed(
runtime_env, scratch_dir, logger=logger
runtime_env,
include_gitignore=include_gitignore,
scratch_dir=scratch_dir,
logger=logger,
)
runtime_env = upload_worker_process_setup_hook_if_needed(
runtime_env,
Expand Down
Loading