Skip to content

Commit a8707ad

Browse files
authored
[core] Add .rayignore (#58500)
## Description ### Status Quo Previously, `.gitignore` files handled both uploading to cluster _and_ uploading to github. This PR essentially allows the ability to break those 2 functionalities apart by creating a `.rayignore` file which will handle uploading to cluster. ### Purpose Any path or file specified in `.rayignore` will be ignored when uploading to the cluster. This is useful for local development when you don't want random files being uploaded and taking up space. ### How it works By default, directories containing both `.gitignore` and `.rayignore` will both be considered (so existing behavior is preserved). To make `.gitignore` only ignore files uploaded to github, and `.rayignore` only ignore files uploaded to cluster (essentially making them independent of each other), you can use the existing `RAY_RUNTIME_ENV_IGNORE_GITIGNORE` and set that to `1` ## Related issues #53648 ## Additional information Since `.rayignore` is part of the ray ecosystem, I did not create an env var to disable ignoring all-together. If users do not want to ignore files, they can leave `.rayignore` empty, or not create the file at all. --------- Signed-off-by: iamjustinhsu <[email protected]>
1 parent d972b7d commit a8707ad

File tree

12 files changed

+261
-68
lines changed

12 files changed

+261
-68
lines changed

doc/source/ray-core/handling-dependencies.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime
505505

506506
Note: Setting a local directory per-task or per-actor is currently unsupported; it can only be set per-job (i.e., in ``ray.init()``).
507507

508-
Note: If the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
508+
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the specified files are not uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
509509

510510
Note: If the local directory contains symbolic links, Ray follows the links and the files they point to are uploaded to the cluster.
511511

@@ -532,7 +532,8 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime
532532

533533
Note: Setting options (1), (3) and (4) per-task or per-actor is currently unsupported, it can only be set per-job (i.e., in ``ray.init()``).
534534

535-
Note: For option (1), if the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
535+
Note: For option (1), by default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the specified files are not uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
536+
536537

537538
- ``py_executable`` (str): Specifies the executable used for running the Ray workers. It can include arguments as well. The executable can be
538539
located in the `working_dir`. This runtime environment is useful to run workers in a custom debugger or profiler as well as to run workers

python/ray/_private/runtime_env/packaging.py

Lines changed: 84 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from ray._private.path_utils import is_path
1616
from ray._private.ray_constants import (
1717
GRPC_CPP_MAX_MESSAGE_SIZE,
18-
RAY_RUNTIME_ENV_IGNORE_GITIGNORE,
1918
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_DEFAULT,
2019
RAY_RUNTIME_ENV_URI_PIN_EXPIRATION_S_ENV_VAR,
2120
)
@@ -86,16 +85,17 @@ def _dir_travel(
8685
path: Path,
8786
excludes: List[Callable],
8887
handler: Callable,
88+
include_gitignore: bool,
8989
logger: Optional[logging.Logger] = default_logger,
9090
):
9191
"""Travels the path recursively, calling the handler on each subpath.
9292
9393
Respects excludes, which will be called to check if this path is skipped.
9494
"""
95-
e = _get_gitignore(path)
96-
97-
if e is not None:
98-
excludes.append(e)
95+
new_excludes = get_excludes_from_ignore_files(
96+
path, include_gitignore=include_gitignore, logger=logger
97+
)
98+
excludes.extend(new_excludes)
9999

100100
skip = any(e(path) for e in excludes)
101101
if not skip:
@@ -106,9 +106,15 @@ def _dir_travel(
106106
raise e
107107
if path.is_dir():
108108
for sub_path in path.iterdir():
109-
_dir_travel(sub_path, excludes, handler, logger=logger)
109+
_dir_travel(
110+
sub_path,
111+
excludes,
112+
handler,
113+
include_gitignore=include_gitignore,
114+
logger=logger,
115+
)
110116

111-
if e is not None:
117+
for _ in range(len(new_excludes)):
112118
excludes.pop()
113119

114120

@@ -166,6 +172,7 @@ def _hash_directory(
166172
root: Path,
167173
relative_path: Path,
168174
excludes: Optional[Callable],
175+
include_gitignore: bool,
169176
logger: Optional[logging.Logger] = default_logger,
170177
) -> bytes:
171178
"""Helper function to create hash of a directory.
@@ -183,7 +190,9 @@ def handler(path: Path):
183190
hash_val = _xor_bytes(hash_val, file_hash)
184191

185192
excludes = [] if excludes is None else [excludes]
186-
_dir_travel(root, excludes, handler, logger=logger)
193+
_dir_travel(
194+
root, excludes, handler, include_gitignore=include_gitignore, logger=logger
195+
)
187196
return hash_val
188197

189198

@@ -280,24 +289,21 @@ def match(p: Path):
280289
return match
281290

282291

283-
def _get_gitignore(path: Path) -> Optional[Callable]:
292+
def _get_ignore_file(path: Path, ignore_file: str) -> Optional[Callable]:
284293
"""Returns a function that returns True if the path should be excluded.
285294
286-
Returns None if there is no .gitignore file in the path, or if the
287-
RAY_RUNTIME_ENV_IGNORE_GITIGNORE environment variable is set to 1.
295+
Returns None if there is no ignore_file in the path.
288296
289297
Args:
290-
path: The path to the directory to check for a .gitignore file.
298+
path: The path to the directory to check for an ignore file.
299+
ignore_file: The name of the ignore file.
291300
292301
Returns:
293302
A function that returns True if the path should be excluded.
294303
"""
295-
ignore_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") == "1"
296-
if ignore_gitignore:
297-
return None
298304

299305
path = path.absolute()
300-
ignore_file = path / ".gitignore"
306+
ignore_file = path / ignore_file
301307
if ignore_file.is_file():
302308
with ignore_file.open("r") as f:
303309
pathspec = PathSpec.from_lines("gitwildmatch", f.readlines())
@@ -311,6 +317,43 @@ def match(p: Path):
311317
return None
312318

313319

320+
def get_excludes_from_ignore_files(
321+
path: Path,
322+
include_gitignore: bool,
323+
logger: Optional[logging.Logger] = default_logger,
324+
) -> List[Callable]:
325+
"""Get exclusion functions from .gitignore and .rayignore files in the current path.
326+
327+
Args:
328+
path: The path to check for ignore files.
329+
include_gitignore: Whether to respect .gitignore files.
330+
logger: Logger to use.
331+
332+
Returns:
333+
List[Callable]: List of exclusion functions. Each function takes a Path
334+
and returns True if the path should be excluded based on the ignore
335+
patterns in the respective ignore file.
336+
"""
337+
ignore_files = []
338+
339+
to_ignore: List[Optional[Callable]] = []
340+
if include_gitignore:
341+
g = _get_ignore_file(path, ignore_file=".gitignore")
342+
if g is not None:
343+
to_ignore.append(g)
344+
ignore_files.append(path / ".gitignore")
345+
346+
r = _get_ignore_file(path, ignore_file=".rayignore")
347+
if r is not None:
348+
to_ignore.append(r)
349+
ignore_files.append(path / ".rayignore")
350+
351+
if ignore_files:
352+
logger.info(f"Ignoring upload to cluster for these files: {ignore_files}")
353+
354+
return to_ignore
355+
356+
314357
def pin_runtime_env_uri(uri: str, *, expiration_s: Optional[int] = None) -> None:
315358
"""Pin a reference to a runtime_env URI in the GCS on a timeout.
316359
@@ -403,6 +446,7 @@ def _zip_files(
403446
path_str: str,
404447
excludes: List[str],
405448
output_path: str,
449+
include_gitignore: bool,
406450
include_parent_dir: bool = False,
407451
logger: Optional[logging.Logger] = default_logger,
408452
) -> None:
@@ -440,7 +484,13 @@ def handler(path: Path):
440484
zip_handler.write(path, to_path)
441485

442486
excludes = [_get_excludes(file_path, excludes)]
443-
_dir_travel(file_path, excludes, handler, logger=logger)
487+
_dir_travel(
488+
file_path,
489+
excludes,
490+
handler,
491+
include_gitignore=include_gitignore,
492+
logger=logger,
493+
)
444494

445495

446496
def package_exists(pkg_uri: str) -> bool:
@@ -508,7 +558,11 @@ def get_uri_for_file(file: str) -> str:
508558
)
509559

510560

511-
def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None) -> str:
561+
def get_uri_for_directory(
562+
directory: str,
563+
include_gitignore: bool,
564+
excludes: Optional[List[str]] = None,
565+
) -> str:
512566
"""Get a content-addressable URI from a directory's contents.
513567
514568
This function generates the name of the package by the directory.
@@ -524,6 +578,7 @@ def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None)
524578
525579
Args:
526580
directory: The directory.
581+
include_gitignore: Whether to respect .gitignore files.
527582
excludes (list[str]): The dir or files that should be excluded.
528583
529584
Returns:
@@ -539,7 +594,12 @@ def get_uri_for_directory(directory: str, excludes: Optional[List[str]] = None)
539594
if not directory.exists() or not directory.is_dir():
540595
raise ValueError(f"directory {directory} must be an existing directory")
541596

542-
hash_val = _hash_directory(directory, directory, _get_excludes(directory, excludes))
597+
hash_val = _hash_directory(
598+
directory,
599+
directory,
600+
_get_excludes(directory, excludes),
601+
include_gitignore=include_gitignore,
602+
)
543603

544604
return "{protocol}://{pkg_name}.zip".format(
545605
protocol=Protocol.GCS.value, pkg_name=RAY_PKG_PREFIX + hash_val.hex()
@@ -574,6 +634,7 @@ def upload_package_to_gcs(pkg_uri: str, pkg_bytes: bytes) -> None:
574634
def create_package(
575635
module_path: str,
576636
target_path: Path,
637+
include_gitignore: bool,
577638
include_parent_dir: bool = False,
578639
excludes: Optional[List[str]] = None,
579640
logger: Optional[logging.Logger] = default_logger,
@@ -590,6 +651,7 @@ def create_package(
590651
module_path,
591652
excludes,
592653
str(target_path),
654+
include_gitignore=include_gitignore,
593655
include_parent_dir=include_parent_dir,
594656
logger=logger,
595657
)
@@ -599,6 +661,7 @@ def upload_package_if_needed(
599661
pkg_uri: str,
600662
base_directory: str,
601663
module_path: str,
664+
include_gitignore: bool,
602665
include_parent_dir: bool = False,
603666
excludes: Optional[List[str]] = None,
604667
logger: Optional[logging.Logger] = default_logger,
@@ -617,6 +680,7 @@ def upload_package_if_needed(
617680
include_parent_dir: If true, includes the top-level directory as a
618681
directory inside the zip file.
619682
excludes: List specifying files to exclude.
683+
include_gitignore: Whether to respect .gitignore files. Default is True.
620684
621685
Raises:
622686
RuntimeError: If the upload fails.
@@ -646,6 +710,7 @@ def upload_package_if_needed(
646710
create_package(
647711
module_path,
648712
package_file,
713+
include_gitignore=include_gitignore,
649714
include_parent_dir=include_parent_dir,
650715
excludes=excludes,
651716
)

python/ray/_private/runtime_env/py_modules.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def _check_is_uri(s: str) -> bool:
4848

4949
def upload_py_modules_if_needed(
5050
runtime_env: Dict[str, Any],
51+
include_gitignore: bool,
5152
scratch_dir: Optional[str] = os.getcwd(),
5253
logger: Optional[logging.Logger] = default_logger,
5354
upload_fn=None,
@@ -102,7 +103,11 @@ def upload_py_modules_if_needed(
102103
is_dir = Path(module_path).is_dir()
103104
excludes = runtime_env.get("excludes", None)
104105
if is_dir:
105-
module_uri = get_uri_for_directory(module_path, excludes=excludes)
106+
module_uri = get_uri_for_directory(
107+
module_path,
108+
include_gitignore=include_gitignore,
109+
excludes=excludes,
110+
)
106111
else:
107112
module_uri = get_uri_for_file(module_path)
108113
if upload_fn is None:
@@ -111,8 +116,9 @@ def upload_py_modules_if_needed(
111116
module_uri,
112117
scratch_dir,
113118
module_path,
114-
excludes=excludes,
119+
include_gitignore=include_gitignore,
115120
include_parent_dir=is_dir,
121+
excludes=excludes,
116122
logger=logger,
117123
)
118124
except Exception as e:

python/ray/_private/runtime_env/working_dir.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
def upload_working_dir_if_needed(
3232
runtime_env: Dict[str, Any],
33+
include_gitignore: bool,
3334
scratch_dir: Optional[str] = os.getcwd(),
3435
logger: Optional[logging.Logger] = default_logger,
3536
upload_fn: Optional[Callable[[str, Optional[List[str]]], None]] = None,
@@ -64,7 +65,11 @@ def upload_working_dir_if_needed(
6465

6566
excludes = runtime_env.get("excludes", None)
6667
try:
67-
working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
68+
working_dir_uri = get_uri_for_directory(
69+
working_dir,
70+
include_gitignore=include_gitignore,
71+
excludes=excludes,
72+
)
6873
except ValueError: # working_dir is not a directory
6974
package_path = Path(working_dir)
7075
if not package_path.exists() or package_path.suffix != ".zip":
@@ -90,6 +95,7 @@ def upload_working_dir_if_needed(
9095
working_dir,
9196
include_parent_dir=False,
9297
excludes=excludes,
98+
include_gitignore=include_gitignore,
9399
logger=logger,
94100
)
95101
except Exception as e:

python/ray/_private/worker.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2610,13 +2610,24 @@ def connect(
26102610
# environment here. If it's ray client, the environment will be prepared
26112611
# at the server side.
26122612
if mode == SCRIPT_MODE and not job_config._client_job and job_config.runtime_env:
2613+
from ray._private.ray_constants import RAY_RUNTIME_ENV_IGNORE_GITIGNORE
2614+
26132615
scratch_dir: str = worker.node.get_runtime_env_dir_path()
26142616
runtime_env = job_config.runtime_env or {}
2617+
# Determine whether to respect .gitignore files based on environment variable
2618+
# Default is True (respect .gitignore). Set to False if env var is "1".
2619+
include_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") != "1"
26152620
runtime_env = upload_py_modules_if_needed(
2616-
runtime_env, scratch_dir, logger=logger
2621+
runtime_env,
2622+
include_gitignore=include_gitignore,
2623+
scratch_dir=scratch_dir,
2624+
logger=logger,
26172625
)
26182626
runtime_env = upload_working_dir_if_needed(
2619-
runtime_env, scratch_dir, logger=logger
2627+
runtime_env,
2628+
include_gitignore=include_gitignore,
2629+
scratch_dir=scratch_dir,
2630+
logger=logger,
26202631
)
26212632
runtime_env = upload_worker_process_setup_hook_if_needed(
26222633
runtime_env,

0 commit comments

Comments
 (0)