Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/source/ray-core/handling-dependencies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting a local directory per-task or per-actor is currently unsupported; it can only be set per-job (i.e., in ``ray.init()``).

Note: If the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: In accordance with technical writing style guide:

  • Active tense
  • Avoiding future tense
  • Contractions

Not sure if "Ray" is the right subject in "Ray doesn't upload". Might need to check me on that.

Suggested change
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.
Note: By default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, Ray doesn't upload the specified files to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.


Note: If the local directory contains symbolic links, Ray follows the links and the files they point to are uploaded to the cluster.

Expand All @@ -532,7 +532,8 @@ The ``runtime_env`` is a Python dictionary or a Python class :class:`ray.runtime

Note: Setting options (1), (3) and (4) per-task or per-actor is currently unsupported, it can only be set per-job (i.e., in ``ray.init()``).

Note: For option (1), if the local directory contains a ``.gitignore`` file, the files and paths specified there are not uploaded to the cluster. You can disable this by setting the environment variable `RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1` on the machine doing the uploading.
Note: For option (1), by default, if the local directory contains a ``.gitignore`` and/or ``.rayignore`` file, the files and paths specified in both will not be uploaded to the cluster. To disable the ``.gitignore`` from being considered, set ``RAY_RUNTIME_ENV_IGNORE_GITIGNORE=1`` on the machine doing the uploading.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same suggestion here


- ``py_executable`` (str): Specifies the executable used for running the Ray workers. It can include arguments as well. The executable can be
located in the `working_dir`. This runtime environment is useful to run workers in a custom debugger or profiler as well as to run workers
Expand Down
52 changes: 40 additions & 12 deletions python/ray/_private/runtime_env/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,9 @@ def _dir_travel(

Respects excludes, which will be called to check if this path is skipped.
"""
e = _get_gitignore(path)

if e is not None:
excludes.append(e)
new_excludes = get_excludes_from_ignore_files(path, logger=logger)
excludes.extend(new_excludes)

skip = any(e(path) for e in excludes)
if not skip:
Expand All @@ -108,7 +107,7 @@ def _dir_travel(
for sub_path in path.iterdir():
_dir_travel(sub_path, excludes, handler, logger=logger)

if e is not None:
for _ in range(len(new_excludes)):
excludes.pop()


Expand Down Expand Up @@ -280,24 +279,22 @@ def match(p: Path):
return match


def _get_gitignore(path: Path) -> Optional[Callable]:
def _get_ignore_file(path: Path, ignore_file: str) -> Optional[Callable]:
"""Returns a function that returns True if the path should be excluded.

Returns None if there is no .gitignore file in the path, or if the
RAY_RUNTIME_ENV_IGNORE_GITIGNORE environment variable is set to 1.
Returns None if there is no .gitignore file in the path.

Args:
path: The path to the directory to check for a .gitignore file.
path: The path to the directory to check for a .rayignore file.
ignore_file: The name of the ignore file. Combined with path will
give the absolute path to the ignore_file.

Returns:
A function that returns True if the path should be excluded.
"""
ignore_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") == "1"
if ignore_gitignore:
return None

path = path.absolute()
ignore_file = path / ".gitignore"
ignore_file = path / ignore_file
if ignore_file.is_file():
with ignore_file.open("r") as f:
pathspec = PathSpec.from_lines("gitwildmatch", f.readlines())
Expand All @@ -311,6 +308,37 @@ def match(p: Path):
return None


def get_excludes_from_ignore_files(
path: Path, logger: Optional[logging.Logger] = default_logger
) -> List[Callable]:
"""Get exclusion functions from .gitignore and .rayignore files in the current path.

Environment Variables:
RAY_RUNTIME_ENV_IGNORE_GITIGNORE: If set to "1", .gitignore files
won't be parsed. Default is "0" (parse .gitignore).

Returns:
List[Callable]: List of exclusion functions. Each function takes a Path
and returns True if the path should be excluded based on the ignore
patterns in the respective ignore file.
"""
ignore_gitignore = os.environ.get(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, "0") == "1"
Copy link
Collaborator

@edoakes edoakes Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I generally prefer to try to follow a more functional style and keep external dependencies (like env vars) higher in the call stack. in this case, that means adding an explicit, required argument to this function, like include_gitignore and then populating that argument based on the environment variable at the callsite.

this makes it easier to test, makes the code more self-documenting, and reduces "surprise" for future developers, who may not realize that something deep in the call stack is controlled by an environment variable


to_ignore: List[Optional[Callable]] = []
if not ignore_gitignore:
# Default behavior: use both .gitignore and .rayignore
# .gitignore is parsed, and .rayignore inherits from it
g = _get_ignore_file(path, ignore_file=".gitignore")
to_ignore.append(g)
logger.info(
"Ignoring files found in .rayignore (if exists) and .gitginore (if exists)"
)

r = _get_ignore_file(path, ignore_file=".rayignore")
to_ignore.append(r)
return [ignore for ignore in to_ignore if ignore is not None]


def pin_runtime_env_uri(uri: str, *, expiration_s: Optional[int] = None) -> None:
"""Pin a reference to a runtime_env URI in the GCS on a timeout.

Expand Down
52 changes: 49 additions & 3 deletions python/ray/tests/test_runtime_env_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
Protocol,
_dir_travel,
_get_excludes,
_get_gitignore,
_get_ignore_file,
_store_package_in_gcs,
download_and_unpack_package,
get_excludes_from_ignore_files,
get_local_dir_from_uri,
get_top_level_dir_from_compressed_package,
get_uri_for_directory,
Expand Down Expand Up @@ -904,8 +905,53 @@ async def test_download_and_unpack_package_with_invalid_uri(
def test_get_gitignore(tmp_path):
gitignore_path = tmp_path / ".gitignore"
gitignore_path.write_text("*.pyc")
assert _get_gitignore(tmp_path)(Path(tmp_path / "foo.pyc")) is True
assert _get_gitignore(tmp_path)(Path(tmp_path / "foo.py")) is False
gitignore_func = _get_ignore_file(tmp_path, ".gitignore")
assert gitignore_func(Path(tmp_path / "foo.pyc")) is True
assert gitignore_func(Path(tmp_path / "foo.py")) is False


@pytest.mark.parametrize(
"ignore_gitignore,expected_excludes",
[
# Default: both .gitignore and .rayignore are used
("0", ["gitignore", "rayignore"]),
# Only .rayignore is used, no inheritance
("1", ["rayignore"]),
],
)
def test_ray_ignore_and_git_ignore_together(
tmp_path, ignore_gitignore, expected_excludes, monkeypatch
):
"""Test get_excludes_from_ignore_files with different environment variable combinations."""
# Set up environment variables
monkeypatch.setenv(RAY_RUNTIME_ENV_IGNORE_GITIGNORE, ignore_gitignore)

# Create test ignore files
gitignore_path = tmp_path / ".gitignore"
gitignore_path.write_text("*.pyc")
git_ignore_file = tmp_path / "test.pyc"

rayignore_path = tmp_path / ".rayignore"
rayignore_path.write_text("*.cache")
ray_ignore_file = tmp_path / "test.cache"

# Get exclusion functions
exclude_funcs = get_excludes_from_ignore_files(tmp_path)

# Check the number of exclusion functions returned
assert len(exclude_funcs) == len(
expected_excludes
), f"Should have {expected_excludes}"

# Check if files are excluded based on expected_excludes
gitignore_active = "gitignore" in expected_excludes
rayignore_active = "rayignore" in expected_excludes

# .gitignore patterns
assert any(f(git_ignore_file) for f in exclude_funcs) == gitignore_active

# .rayignore patterns
assert any(f(ray_ignore_file) for f in exclude_funcs) == rayignore_active


@pytest.mark.parametrize("ignore_gitignore", [True, False])
Expand Down