From 0c66b5442d8b93c40b3fbd741a683cb2c0a4843f Mon Sep 17 00:00:00 2001 From: Tristan Rice Date: Fri, 11 Mar 2022 12:43:52 -0800 Subject: [PATCH] slurm_scheduler, dir_workspace: add isolated workspaces for Slurm (#416) Summary: This adds a new `DirWorkspace` which will copy the current workspace to the directory for code isolation purposes and integrates it in with the `slurm` scheduler via the `job_dir` runopt. The job dir must not exist and will be created. The CWD will be located in that job_dir. * `.torchxignore` is used for excluding files from the workspace * `.torchxslurmjobdirs` is used to track where job directories and thus logs are located Pull Request resolved: https://github.com/pytorch/torchx/pull/416 Test Plan: Slurm integ tests + unit tests Reviewed By: kiukchung Differential Revision: D34801126 Pulled By: d4l3k fbshipit-source-id: d53f6f36ad76921289116ee3e8a7c05b0975e594 --- docs/requirements.txt | 1 + docs/source/workspace.rst | 12 ++ scripts/slurmtest.sh | 6 +- torchx/runner/api.py | 2 +- torchx/runner/test/api_test.py | 5 +- torchx/schedulers/api.py | 2 +- torchx/schedulers/slurm_scheduler.py | 73 ++++++++++-- torchx/schedulers/test/api_test.py | 4 +- .../schedulers/test/slurm_scheduler_test.py | 107 ++++++++++++------ torchx/workspace/__init__.py | 2 +- torchx/workspace/api.py | 66 ++++++++++- torchx/workspace/dir_workspace.py | 51 +++++++++ torchx/workspace/docker_workspace.py | 43 ++----- torchx/workspace/test/dir_workspace_test.py | 105 +++++++++++++++++ .../workspace/test/docker_workspace_test.py | 4 +- 15 files changed, 395 insertions(+), 88 deletions(-) create mode 100644 torchx/workspace/dir_workspace.py create mode 100644 torchx/workspace/test/dir_workspace_test.py diff --git a/docs/requirements.txt b/docs/requirements.txt index 18a23d83e..2ec1751c4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ papermill ipykernel nbsphinx jupytext +ipython_genutils diff --git a/docs/source/workspace.rst b/docs/source/workspace.rst index d8d851bd0..7d109e1db 100644 --- a/docs/source/workspace.rst +++ b/docs/source/workspace.rst @@ -9,6 +9,8 @@ torchx.workspace .. autoclass:: Workspace :members: +.. autofunction:: walk_workspace + torchx.workspace.docker_workspace ####################################### @@ -19,3 +21,13 @@ torchx.workspace.docker_workspace .. autoclass:: DockerWorkspace :members: :private-members: _update_app_images, _push_images + +torchx.workspace.dir_workspace +####################################### + + +.. automodule:: torchx.workspace.dir_workspace +.. currentmodule:: torchx.workspace.dir_workspace + +.. autoclass:: DirWorkspace + :members: diff --git a/scripts/slurmtest.sh b/scripts/slurmtest.sh index 10f319f78..116f790ec 100755 --- a/scripts/slurmtest.sh +++ b/scripts/slurmtest.sh @@ -15,6 +15,8 @@ DIR="$BASE_DIR/project" mkdir "$DIR" cd "$DIR" +JOB_DIR="$BASE_DIR/job" + # shellcheck disable=SC1091 source /opt/slurm/etc/slurm.sh sbatch --version @@ -32,10 +34,12 @@ partition=compute time=10 comment=hello nomem=true +job_dir=$JOB_DIR EOT cat < main.py -print("hello world!") +import sys +print("hello world!", file=sys.stderr) EOT APP_ID="$(torchx run --wait --log --scheduler slurm dist.ddp -j 2x1 --script main.py)" diff --git a/torchx/runner/api.py b/torchx/runner/api.py index 87996ee12..71f082685 100644 --- a/torchx/runner/api.py +++ b/torchx/runner/api.py @@ -283,7 +283,7 @@ def dryrun( logger.info( f"Building workspace: {workspace} for role[0]: {role.name}, image: {old_img}" ) - sched.build_workspace_and_update_role(role, workspace) + sched.build_workspace_and_update_role(role, workspace, cfg) logger.info("Done building workspace") if old_img != role.image: logger.info(f"New image: {role.image} built from workspace") diff --git a/torchx/runner/test/api_test.py b/torchx/runner/test/api_test.py index d696b8842..fa2cdfa74 100644 --- a/torchx/runner/test/api_test.py +++ b/torchx/runner/test/api_test.py @@ -154,7 +154,10 @@ def _cancel_existing(self, app_id: str) -> None: pass def build_workspace_and_update_role( - self, role: Role, workspace: str + self, + role: Role, + workspace: str, + cfg: Mapping[str, CfgVal], ) -> None: if self.build_new_img: role.image = f"{role.image}_new" diff --git a/torchx/schedulers/api.py b/torchx/schedulers/api.py index 126e59fbc..b0ef05c59 100644 --- a/torchx/schedulers/api.py +++ b/torchx/schedulers/api.py @@ -109,7 +109,7 @@ def submit( sched = self assert isinstance(sched, Workspace) role = app.roles[0] - sched.build_workspace_and_update_role(role, workspace) + sched.build_workspace_and_update_role(role, workspace, cfg) dryrun_info = self.submit_dryrun(app, cfg) return self.schedule(dryrun_info) diff --git a/torchx/schedulers/slurm_scheduler.py b/torchx/schedulers/slurm_scheduler.py index fce888c21..5d24ae528 100644 --- a/torchx/schedulers/slurm_scheduler.py +++ b/torchx/schedulers/slurm_scheduler.py @@ -20,6 +20,7 @@ from datetime import datetime from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple +import torchx from torchx.schedulers.api import AppDryRunInfo, DescribeAppResponse, Scheduler, Stream from torchx.schedulers.local_scheduler import LogIterator from torchx.specs import ( @@ -34,7 +35,9 @@ macros, runopts, ) +from torchx.workspace.dir_workspace import DirWorkspace +SLURM_JOB_DIRS = ".torchxslurmjobdirs" SLURM_STATES: Mapping[str, AppState] = { "BOOT_FAIL": AppState.FAILED, @@ -166,6 +169,7 @@ class SlurmBatchRequest: cmd: List[str] replicas: Dict[str, SlurmReplicaRequest] + job_dir: Optional[str] def materialize(self) -> str: """ @@ -186,7 +190,12 @@ def materialize(self) -> str: sbatch_opts = "#SBATCH hetjob\n".join( f"#SBATCH {group}\n" for group in sbatch_groups ) + cmd = " ".join([shlex.quote(arg) for arg in self.cmd]) script = f"""#!/bin/bash +# +# Generated by TorchX {torchx.__version__} +# Run with: {cmd} +# {sbatch_opts} # exit on error set -e @@ -207,7 +216,7 @@ def __repr__(self) -> str: {self.materialize()}""" -class SlurmScheduler(Scheduler): +class SlurmScheduler(Scheduler, DirWorkspace): """ SlurmScheduler is a TorchX scheduling interface to slurm. TorchX expects that slurm CLI tools are locally installed and job accounting is enabled. @@ -261,11 +270,8 @@ class SlurmScheduler(Scheduler): Partial support. SlurmScheduler will return job and replica status but does not provide the complete original AppSpec. workspaces: | - Partial support. Typical Slurm usage is from a shared NFS mount - so code will automatically be updated on the workers. - SlurmScheduler does not support programmatic patching via - WorkspaceScheduler. - + If ``job_dir`` is specified the DirWorkspace will create a new + isolated directory with a snapshot of the workspace. """ def __init__(self, session_name: str) -> None: @@ -283,7 +289,9 @@ def run_opts(self) -> runopts: "time", type_=str, default=None, - help="The maximum time the job is allowed to run for.", + help='The maximum time the job is allowed to run for. Formats: \ + "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", \ + "days-hours:minutes" or "days-hours:minutes:seconds"', ) opts.add( "nomem", @@ -311,25 +319,43 @@ def run_opts(self) -> runopts: type_=str, help="What events to mail users on.", ) + opts.add( + "job_dir", + type_=str, + help="""The directory to place the job code and outputs. The + directory must not exist and will be created. To enable log + iteration, jobs will be tracked in ``.torchxslurmjobdirs``. + """, + ) return opts def schedule(self, dryrun_info: AppDryRunInfo[SlurmBatchRequest]) -> str: req = dryrun_info.request + job_dir = req.job_dir with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(job_dir or tmpdir, "torchx-sbatch.sh") + if job_dir is not None: + req.cmd += [f"--chdir={job_dir}"] + req.cmd += [path] script = req.materialize() - path = os.path.join(tmpdir, "job.sh") with open(path, "w") as f: f.write(script) - cmd = req.cmd + [path] + p = subprocess.run(req.cmd, stdout=subprocess.PIPE, check=True) + job_id = p.stdout.decode("utf-8").strip() + + if job_dir is not None: + _save_job_dir(job_id, job_dir) - p = subprocess.run(cmd, stdout=subprocess.PIPE, check=True) - return p.stdout.decode("utf-8").strip() + return job_id def _submit_dryrun( self, app: AppDef, cfg: Mapping[str, CfgVal] ) -> AppDryRunInfo[SlurmBatchRequest]: + job_dir = cfg.get("job_dir") + assert job_dir is None or isinstance(job_dir, str), "job_dir must be str" + replicas = {} for role in app.roles: for replica_id in range(role.num_replicas): @@ -351,6 +377,7 @@ def _submit_dryrun( req = SlurmBatchRequest( cmd=cmd, replicas=replicas, + job_dir=job_dir, ) return AppDryRunInfo(req, repr) @@ -443,6 +470,9 @@ def log_iter( ) log_file = f"slurm-{app_id}-{role_name}-{k}.{extension}" + job_dirs = _get_job_dirs() + if app_id in job_dirs: + log_file = os.path.join(job_dirs[app_id], log_file) return LogIterator( app_id, regex or ".*", log_file, self, should_tail=should_tail @@ -453,3 +483,24 @@ def create_scheduler(session_name: str, **kwargs: Any) -> SlurmScheduler: return SlurmScheduler( session_name=session_name, ) + + +def _save_job_dir(job_id: str, job_dir: str) -> None: + with open(SLURM_JOB_DIRS, "at") as f: + f.write(f"{job_id} = {job_dir}\n") + + +def _get_job_dirs() -> Mapping[str, str]: + try: + with open(SLURM_JOB_DIRS, "rt") as f: + lines = f.readlines() + except FileNotFoundError: + return {} + + out = {} + for line in lines: + first, _, second = line.partition("=") + if not first or not second: + continue + out[first.strip()] = second.strip() + return out diff --git a/torchx/schedulers/test/api_test.py b/torchx/schedulers/test/api_test.py index d7343c0b4..e0b7d22c4 100644 --- a/torchx/schedulers/test/api_test.py +++ b/torchx/schedulers/test/api_test.py @@ -69,7 +69,9 @@ def run_opts(self) -> runopts: def resolve_resource(self, resource: Union[str, Resource]) -> Resource: return NULL_RESOURCE - def build_workspace_and_update_role(self, role: Role, workspace: str) -> None: + def build_workspace_and_update_role( + self, role: Role, workspace: str, cfg: Mapping[str, CfgVal] + ) -> None: role.image = workspace def test_invalid_run_cfg(self) -> None: diff --git a/torchx/schedulers/test/slurm_scheduler_test.py b/torchx/schedulers/test/slurm_scheduler_test.py index 09497434f..75ed7fe6e 100644 --- a/torchx/schedulers/test/slurm_scheduler_test.py +++ b/torchx/schedulers/test/slurm_scheduler_test.py @@ -13,6 +13,7 @@ from typing import Generator from unittest.mock import MagicMock, call, patch +import torchx from torchx import specs from torchx.schedulers.api import DescribeAppResponse, Stream from torchx.schedulers.slurm_scheduler import ( @@ -20,6 +21,8 @@ SlurmReplicaRequest, SlurmScheduler, create_scheduler, + _save_job_dir, + _get_job_dirs, ) @@ -179,7 +182,11 @@ def test_dryrun_multi_role(self) -> None: script = req.materialize() self.assertEqual( script, - """#!/bin/bash + f"""#!/bin/bash +# +# Generated by TorchX {torchx.__version__} +# Run with: sbatch --parsable +# #SBATCH --job-name=a-0 --ntasks-per-node=1 #SBATCH hetjob #SBATCH --job-name=a-1 --ntasks-per-node=1 @@ -348,44 +355,49 @@ def test_describe_running(self, run: MagicMock) -> None: def test_log_iter(self, run: MagicMock) -> None: scheduler = create_scheduler("foo") - with tmp_cwd(): - with open("slurm-54-echo-1.out", "wt") as f: - f.write("hello\nworld\n") - - logs = list( - scheduler.log_iter( - "54", - "echo", - 1, - streams=Stream.STDOUT, - since=datetime.datetime.now(), + for job_dir in ["", "dir"]: + with tmp_cwd(): + if job_dir: + os.mkdir(job_dir) + _save_job_dir("54", job_dir) + + with open(os.path.join(job_dir, "slurm-54-echo-1.out"), "wt") as f: + f.write("hello\nworld\n") + + logs = list( + scheduler.log_iter( + "54", + "echo", + 1, + streams=Stream.STDOUT, + since=datetime.datetime.now(), + ) ) - ) - self.assertEqual(logs, ["hello", "world"]) - - with open("slurm-54-echo-1.err", "wt") as f: - f.write("foo\nbar\n") - - logs = list( - scheduler.log_iter( - "54", - "echo", - 1, - streams=Stream.STDERR, + self.assertEqual(logs, ["hello", "world"]) + + with open(os.path.join(job_dir, "slurm-54-echo-1.err"), "wt") as f: + f.write("foo\nbar\n") + + logs = list( + scheduler.log_iter( + "54", + "echo", + 1, + streams=Stream.STDERR, + ) ) - ) - self.assertEqual(logs, ["foo", "bar"]) + self.assertEqual(logs, ["foo", "bar"]) - # no stream specified should default to STDERR - logs = list( - scheduler.log_iter( - "54", - "echo", - 1, + # no stream specified should default to STDERR + logs = list( + scheduler.log_iter( + "54", + "echo", + 1, + ) ) - ) - self.assertEqual(logs, ["foo", "bar"]) + self.assertEqual(logs, ["foo", "bar"]) with self.assertRaises(ValueError): scheduler.log_iter("54", "echo", 1, streams=Stream.COMBINED) @@ -422,3 +434,30 @@ def test_dryrun_mail(self) -> None: "--mail-type=END", info.request.cmd, ) + + @patch("subprocess.run") + def test_run_workspace_job_dir(self, run: MagicMock) -> None: + with tmp_cwd(): + run.return_value.stdout = b"1234" + scheduler = create_scheduler("foo") + scheduler.submit( + simple_app(), + cfg={ + "job_dir": "dir", + }, + workspace=".", + ) + self.assertIn(("1234", "dir"), _get_job_dirs().items()) + + self.assertEqual(run.call_count, 1) + args, kwargs = run.call_args + (args,) = args + self.assertEqual( + args, + [ + "sbatch", + "--parsable", + "--chdir=dir", + "dir/torchx-sbatch.sh", + ], + ) diff --git a/torchx/workspace/__init__.py b/torchx/workspace/__init__.py index 2b48db3b0..132d10f4b 100644 --- a/torchx/workspace/__init__.py +++ b/torchx/workspace/__init__.py @@ -20,4 +20,4 @@ * ``memory://foo-bar/`` an in-memory workspace for notebook/programmatic usage """ -from torchx.workspace.api import Workspace # noqa: F401 +from torchx.workspace.api import Workspace, walk_workspace # noqa: F401 diff --git a/torchx/workspace/api.py b/torchx/workspace/api.py index d5bd868fb..a5d920f4e 100644 --- a/torchx/workspace/api.py +++ b/torchx/workspace/api.py @@ -5,8 +5,16 @@ # LICENSE file in the root directory of this source tree. import abc +import fnmatch +import posixpath +from typing import TYPE_CHECKING, Iterable, Mapping, Tuple -from torchx.specs import Role +from torchx.specs import Role, CfgVal + +if TYPE_CHECKING: + from fsspec import AbstractFileSystem + +TORCHX_IGNORE = ".torchxignore" class Workspace(abc.ABC): @@ -25,7 +33,9 @@ class Workspace(abc.ABC): """ @abc.abstractmethod - def build_workspace_and_update_role(self, role: Role, workspace: str) -> None: + def build_workspace_and_update_role( + self, role: Role, workspace: str, cfg: Mapping[str, CfgVal] + ) -> None: """ Builds the specified ``workspace`` with respect to ``img`` and updates the ``role`` to reflect the built workspace artifacts. @@ -36,3 +46,55 @@ def build_workspace_and_update_role(self, role: Role, workspace: str) -> None: Note: this method mutates the passed ``role``. """ ... + + +def _ignore(s: str, patterns: Iterable[str]) -> bool: + match = False + for pattern in patterns: + if pattern.startswith("!") and fnmatch.fnmatch(s, pattern[1:]): + match = False + elif fnmatch.fnmatch(s, pattern): + match = True + return match + + +def walk_workspace( + fs: "AbstractFileSystem", + path: str, + ignore_name: str = TORCHX_IGNORE, +) -> Iterable[Tuple[str, Iterable[str], Mapping[str, Mapping[str, object]]]]: + """ + walk_workspace walks the filesystem path and applies the ignore rules + specified via ``ignore_name``. + This follows the rules for ``.dockerignore``. + https://docs.docker.com/engine/reference/builder/#dockerignore-file + """ + ignore_patterns = [] + ignore_path = posixpath.join(path, ignore_name) + if fs.exists(ignore_path): + with fs.open(ignore_path, "rt") as f: + lines = f.readlines() + for line in lines: + line, _, _ = line.partition("#") + line = line.strip() + if len(line) == 0 or line == ".": + continue + ignore_patterns.append(line) + + for dir, dirs, files in fs.walk(path, detail=True): + assert isinstance(dir, str), "path must be str" + relpath = posixpath.relpath(dir, path) + if _ignore(relpath, ignore_patterns): + continue + dirs = [ + d for d in dirs if not _ignore(posixpath.join(relpath, d), ignore_patterns) + ] + files = { + file: info + for file, info in files.items() + if not _ignore( + posixpath.join(relpath, file) if relpath != "." else file, + ignore_patterns, + ) + } + yield dir, dirs, files diff --git a/torchx/workspace/dir_workspace.py b/torchx/workspace/dir_workspace.py new file mode 100644 index 000000000..e0f6a752f --- /dev/null +++ b/torchx/workspace/dir_workspace.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +import posixpath +import shutil +from typing import Mapping + +import fsspec +from torchx.specs import Role, CfgVal +from torchx.workspace.api import Workspace, walk_workspace + + +class DirWorkspace(Workspace): + def build_workspace_and_update_role( + self, role: Role, workspace: str, cfg: Mapping[str, CfgVal] + ) -> None: + """ + Creates a new directory specified by ``job_dir`` from the workspace. Role + image fields will be set to the ``job_dir``. + + Any files listed in the ``.torchxignore`` folder will be skipped. + """ + job_dir = cfg.get("job_dir") + if job_dir is None: + return + assert isinstance(job_dir, str), "job_dir must be str" + + os.mkdir(job_dir) + _copy_to_dir(workspace, job_dir) + role.image = job_dir + + +def _copy_to_dir(workspace: str, target: str) -> None: + fs, path = fsspec.core.url_to_fs(workspace) + assert isinstance(path, str), "path must be str" + + for dir, dirs, files in walk_workspace(fs, path): + assert isinstance(dir, str), "path must be str" + relpath = posixpath.relpath(dir, path) + for file, info in files.items(): + filepath = posixpath.join( + target, + posixpath.join(relpath, file) if relpath != "." else file, + ) + with fs.open(info["name"], "rb") as src, fsspec.open(filepath, "wb") as dst: + shutil.copyfileobj(src, dst) diff --git a/torchx/workspace/docker_workspace.py b/torchx/workspace/docker_workspace.py index f92a62f87..3f9782d7f 100644 --- a/torchx/workspace/docker_workspace.py +++ b/torchx/workspace/docker_workspace.py @@ -4,18 +4,17 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import fnmatch import io import logging import posixpath import tarfile import tempfile -from typing import IO, TYPE_CHECKING, Optional, Dict, Tuple, Mapping, Iterable +from typing import IO, TYPE_CHECKING, Optional, Dict, Tuple, Mapping import fsspec import torchx from torchx.specs import Role, AppDef, CfgVal -from torchx.workspace.api import Workspace +from torchx.workspace.api import Workspace, walk_workspace if TYPE_CHECKING: from docker import DockerClient @@ -63,7 +62,9 @@ def _docker_client(self) -> "DockerClient": self.__docker_client = client return client - def build_workspace_and_update_role(self, role: Role, workspace: str) -> None: + def build_workspace_and_update_role( + self, role: Role, workspace: str, cfg: Mapping[str, CfgVal] + ) -> None: """ Builds a new docker image using the ``role``'s image as the base image and updates the ``role``'s image with this newly built docker image id @@ -176,44 +177,18 @@ def _build_context(img: str, workspace: str) -> IO[bytes]: return f -def _ignore(s: str, patterns: Iterable[str]) -> bool: - match = False - for pattern in patterns: - if pattern.startswith("!") and fnmatch.fnmatch(s, pattern[1:]): - match = False - elif fnmatch.fnmatch(s, pattern): - match = True - return match - - def _copy_to_tarfile(workspace: str, tf: tarfile.TarFile) -> None: fs, path = fsspec.core.url_to_fs(workspace) assert isinstance(path, str), "path must be str" - # load dockerignore - # https://docs.docker.com/engine/reference/builder/#dockerignore-file - ignore_patterns = [] - ignore_path = posixpath.join(path, ".dockerignore") - if fs.exists(ignore_path): - with fs.open(ignore_path, "rt") as f: - lines = f.readlines() - for line in lines: - line, _, _ = line.partition("#") - line = line.strip() - if len(line) == 0 or line == ".": - continue - ignore_patterns.append(line) - - for dir, dirs, files in fs.walk(path, detail=True): + for dir, dirs, files in walk_workspace(fs, path, ".dockerignore"): assert isinstance(dir, str), "path must be str" relpath = posixpath.relpath(dir, path) - if _ignore(relpath, ignore_patterns): - continue for file, info in files.items(): with fs.open(info["name"], "rb") as f: filepath = posixpath.join(relpath, file) if relpath != "." else file - if _ignore(filepath, ignore_patterns): - continue tinfo = tarfile.TarInfo(filepath) - tinfo.size = info["size"] + size = info["size"] + assert isinstance(size, int), "size must be an int" + tinfo.size = size tf.addfile(tinfo, f) diff --git a/torchx/workspace/test/dir_workspace_test.py b/torchx/workspace/test/dir_workspace_test.py new file mode 100644 index 000000000..8e07fe8bc --- /dev/null +++ b/torchx/workspace/test/dir_workspace_test.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os.path +import tempfile +import unittest + +import fsspec +from torchx.specs import Role +from torchx.workspace.dir_workspace import ( + DirWorkspace, + _copy_to_dir, +) + + +class DirWorkspaceTest(unittest.TestCase): + def test_build_workspace_no_job_dir(self) -> None: + w = DirWorkspace() + role = Role( + name="role", + image="blah", + ) + # should be noop + w.build_workspace_and_update_role(role, workspace="invalid", cfg={}) + self.assertEqual(role.image, "blah") + + def test_build_workspace(self) -> None: + w = DirWorkspace() + role = Role( + name="role", + image="blah", + ) + with tempfile.TemporaryDirectory() as tmpdir: + job_dir = os.path.join(tmpdir, "job") + w.build_workspace_and_update_role( + role, + workspace="invalid", + cfg={ + "job_dir": job_dir, + }, + ) + + def test_torchxignore(self) -> None: + fs = fsspec.filesystem("memory") + files = [ + "ignoredir/bar", + "dir1/bar", + "dir/ignorefileglob1", + "dir/recursive/ignorefileglob2", + "dir/ignorefile", + "ignorefile", + "ignorefilesuffix", + "dir/file", + "foo.sh", + "unignore", + ] + for file in files: + fs.touch("torchxignore/" + file) + with fs.open("torchxignore/.torchxignore", "wt") as f: + f.write( + """ + # comment + + # dirs/files + ignoredir + ignorefile + + # globs + */ignorefileglo*1 + **/ignorefileglob2 + dir? + + # inverse patterns + unignore + !unignore + + # ignore . + . + """ + ) + + _copy_to_dir("memory://torchxignore", "memory://torchxignoredest") + + files = fs.glob("torchxignoredest/*") + fs.glob("torchxignoredest/**/*") + # strip prefix + files = [ + os.path.normpath(file.partition("torchxignoredest/")[2]) for file in files + ] + print(files) + self.assertCountEqual( + files, + { + ".torchxignore", + "dir", + "dir/file", + "dir/ignorefile", + "foo.sh", + "ignorefilesuffix", + "unignore", + }, + ) diff --git a/torchx/workspace/test/docker_workspace_test.py b/torchx/workspace/test/docker_workspace_test.py index 791bd4c08..fa84fc08e 100644 --- a/torchx/workspace/test/docker_workspace_test.py +++ b/torchx/workspace/test/docker_workspace_test.py @@ -43,7 +43,9 @@ def test_docker_workspace(self) -> None: ) workspace = DockerWorkspace() - workspace.build_workspace_and_update_role(role, "memory://test_workspace") + workspace.build_workspace_and_update_role( + role, "memory://test_workspace", {} + ) self.assertNotEqual("busybox", role.image)