Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions release/cluster_tests/workloads/tune_scale_up_down.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,20 @@

import ray

from ray import train, tune
from ray import tune
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This change removes train from the import statement. Ensure that train is not used anywhere else in this file, as this could lead to errors.



def train_fn(config):
this_node_ip = ray.util.get_node_ip_address()
if config["head_node_ip"] == this_node_ip:
# On the head node, run for 30 minutes
for i in range(30):
train.report({"metric": i})
tune.report({"metric": i})
time.sleep(60)
else:
# On worker nodes, run for 3 minutes
for i in range(3):
train.report({"metric": i})
tune.report({"metric": i})
time.sleep(60)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import gc

import ray
from ray import train
from ray.train import Checkpoint, RunConfig, FailureConfig, CheckpointConfig
from ray import tune
from ray.tune import Checkpoint, RunConfig, FailureConfig, CheckpointConfig
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This change updates the import statement to import Checkpoint, RunConfig, FailureConfig, and CheckpointConfig from ray.tune instead of ray.train. Verify that this change aligns with the intended behavior and that these classes are correctly implemented and available in the current context.

from ray.tune.tune_config import TuneConfig
from ray.tune.tuner import Tuner

Expand All @@ -43,12 +43,12 @@

def objective(config):
start_iteration = 0
checkpoint = train.get_checkpoint()
checkpoint = tune.get_checkpoint()
# Ensure that after the node killer warmup time, we always have
# a checkpoint to restore from.
if (time.monotonic() - config["start_time"]) >= config["warmup_time_s"]:
assert checkpoint
checkpoint = train.get_checkpoint()
checkpoint = tune.get_checkpoint()
if checkpoint:
with checkpoint.as_directory() as checkpoint_dir:
with open(os.path.join(checkpoint_dir, "ckpt.pkl"), "rb") as f:
Expand All @@ -61,7 +61,7 @@ def objective(config):
with tempfile.TemporaryDirectory() as tmpdir:
with open(os.path.join(tmpdir, "ckpt.pkl"), "wb") as f:
pickle.dump(dct, f)
train.report(dct, checkpoint=Checkpoint.from_directory(tmpdir))
tune.report(dct, checkpoint=Checkpoint.from_directory(tmpdir))


def main(bucket_uri: str):
Expand Down