Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
0fe4366
initial commit for tune cli
richardliaw Feb 7, 2019
5f1d5a0
Fix TrialRunner._start_time
hartikainen Feb 8, 2019
d4a7b9a
Clean tune cli
hartikainen Feb 8, 2019
400d382
Add explicit info keys for the experiment/project infos
hartikainen Feb 8, 2019
65f718c
small
andrewztan Feb 8, 2019
5b34ff3
Merge pull request #10 from hartikainen/tune_cli
richardliaw Feb 9, 2019
3088016
makes ls command print pretty table
andrewztan Feb 14, 2019
7a97f6a
makes ls command print pretty table
andrewztan Feb 14, 2019
5f44fc9
Merge branch 'tune_cli' into tune_cli
andrewztan Feb 14, 2019
e89fd3e
shortened output for logdir
andrewztan Feb 14, 2019
c8acc11
changed lsx to tabulate
andrewztan Feb 16, 2019
e168652
added sorting for ls
andrewztan Feb 17, 2019
e10425e
fixed lsx and lint
andrewztan Feb 21, 2019
6841a59
faster importing
richardliaw Feb 22, 2019
b759527
Merge pull request #12 from andrewztan/tune_cli
richardliaw Feb 22, 2019
9799c47
Merge branch 'master' into tune_cli
richardliaw Feb 26, 2019
c7d0f5c
Merge branch 'tune_cli' of github.com:richardliaw/ray into tune_cli
richardliaw Feb 26, 2019
63f6bdb
terminla resizing
andrewztan Feb 27, 2019
05b6b56
Merge branch 'tune_cli' of github.com:andrewztan/ray into tune_cli
andrewztan Feb 27, 2019
e238437
lint
andrewztan Feb 27, 2019
af9945c
started cli docs
andrewztan Feb 28, 2019
71735e0
Merge branch 'upstream_master' into tune_cli
andrewztan Feb 28, 2019
8e8b3db
added docs
andrewztan Mar 1, 2019
7d9a239
Merge branch 'master' into tune_cli
richardliaw Mar 1, 2019
e9fef98
Merge branch 'tune_cli' into tune_cli
andrewztan Mar 1, 2019
a8d0e83
replaced python scripts.py with tune in docs
andrewztan Mar 1, 2019
277bcd4
Merge branch 'tune_cli' of git://github.com/richardliaw/ray into tune…
andrewztan Mar 1, 2019
2bfe80d
Merge branch 'tune_cli' of github.com:andrewztan/ray into tune_cli
andrewztan Mar 1, 2019
56b880e
Merge pull request #13 from andrewztan/tune_cli
richardliaw Mar 1, 2019
e345c0d
Merge branch 'master' into tune_cli
richardliaw Mar 3, 2019
d4ea436
Formatting, Flatten
richardliaw Mar 3, 2019
a83d2ab
Format commands, fix up privatization
richardliaw Mar 5, 2019
d99186c
Clean up ls usage
richardliaw Mar 6, 2019
ef140c4
fix up lsx
richardliaw Mar 6, 2019
89c8374
Merge branch 'master' into tune_cli
richardliaw Mar 6, 2019
a332216
TODO: Tests
richardliaw Mar 6, 2019
f646f13
Add tests and docs
richardliaw Mar 7, 2019
bdc76f1
fix import
richardliaw Mar 7, 2019
6d3e326
Merge branch 'master' into tune_cli
richardliaw Mar 7, 2019
e76a7d3
fix
richardliaw Mar 7, 2019
9dc3e67
Merge branch 'tune_cli' of github.com:richardliaw/ray into tune_cli
richardliaw Mar 7, 2019
eb96139
Add better time-formatting
richardliaw Mar 7, 2019
31685ab
Fix up commands
richardliaw Mar 7, 2019
3daf811
fix for py2
richardliaw Mar 7, 2019
36a494f
timestamp
richardliaw Mar 7, 2019
7b03b69
Address comments and fix backward compat.
richardliaw Mar 7, 2019
6bb04b9
Merge branch 'master' into tune_cli
richardliaw Mar 8, 2019
db56b72
missed
richardliaw Mar 8, 2019
ee1e48f
fix tests
richardliaw Mar 8, 2019
60a7672
Fix tests
richardliaw Mar 8, 2019
c097b45
Merge branch 'master' into tune_cli
richardliaw Mar 8, 2019
7309938
travis
richardliaw Mar 8, 2019
1cd9d3c
shorten
richardliaw Mar 8, 2019
e512c63
fix py2
richardliaw Mar 8, 2019
207e0f7
Merge branch 'tune_cli' of github.com:richardliaw/ray into tune_cli
richardliaw Mar 8, 2019
b33ff73
Merge branch 'master' into tune_cli
richardliaw Mar 8, 2019
d8babd4
Fix trial
richardliaw Mar 8, 2019
739d9b4
fix
richardliaw Mar 8, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions ci/travis/install-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q scipy tensorflow cython==0.29.0 gym opencv-python-headless pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx tabulate
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
sudo apt-get update
sudo apt-get install -y python-dev python-numpy build-essential curl unzip tmux gdb
Expand All @@ -34,7 +34,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q scipy tensorflow cython==0.29.0 gym opencv-python-headless pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx tabulate
elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
# check that brew is installed
which -s brew
Expand All @@ -50,7 +50,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q cython==0.29.0 tensorflow gym opencv-python-headless pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx
feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx tabulate
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
# check that brew is installed
which -s brew
Expand All @@ -66,7 +66,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q cython==0.29.0 tensorflow gym opencv-python-headless pyyaml pandas==0.23.4 requests \
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx
feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx tabulate
elif [[ "$LINT" == "1" ]]; then
sudo apt-get update
sudo apt-get install -y build-essential curl unzip
Expand Down
44 changes: 44 additions & 0 deletions doc/source/tune-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,50 @@ And stopping a trial (``PUT /trials/:id``):
curl -X PUT http://<address>:<port>/trials/<trial_id>


Tune CLI (Experimental)
-----------------------

``tune`` has an easy-to-use command line interface (CLI) to manage and monitor your experiments on Ray. To do this, verify that you have the ``tabulate`` library installed:

.. code-block:: bash

$ pip install tabulate

Here are a few examples of command line calls.

- ``tune list-trials``: List tabular information about trials within an experiment. Add the ``--sort`` flag to sort the output by specific columns.

.. code-block:: bash

$ tune list-trials [EXPERIMENT_DIR]

+------------------+-----------------------+------------+
| trainable_name | experiment_tag | trial_id |
|------------------+-----------------------+------------|
| MyTrainableClass | 0_height=40,width=37 | 87b54a1d |
| MyTrainableClass | 1_height=21,width=70 | 23b89036 |
| MyTrainableClass | 2_height=99,width=90 | 518dbe95 |
| MyTrainableClass | 3_height=54,width=21 | 7b99a28a |
| MyTrainableClass | 4_height=90,width=69 | ae4e02fb |
+------------------+-----------------------+------------+
Dropped columns: ['status', 'last_update_time']

- ``tune list-experiments``: List tabular information about experiments within a project. Add the ``--sort`` flag to sort the output by specific columns.

.. code-block:: bash

$ tune list-experiments [PROJECT_DIR]

+----------------------+----------------+------------------+---------------------+
| name | total_trials | running_trials | terminated_trials |
|----------------------+----------------+------------------+---------------------|
| pbt_test | 10 | 0 | 0 |
| test | 1 | 0 | 0 |
| hyperband_test | 1 | 0 | 1 |
+----------------------+----------------+------------------+---------------------+
Dropped columns: ['error_trials', 'last_updated']


Further Questions or Issues?
----------------------------

Expand Down
232 changes: 232 additions & 0 deletions python/ray/tune/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import glob
import json
import logging
import os
import sys
import subprocess
from datetime import datetime

import pandas as pd
from ray.tune.util import flatten_dict
from ray.tune.result import TRAINING_ITERATION, MEAN_ACCURACY, MEAN_LOSS
from ray.tune.trial import Trial
try:
from tabulate import tabulate
except ImportError:
tabulate = None

logger = logging.getLogger(__name__)

TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S (%A)"

DEFAULT_EXPERIMENT_INFO_KEYS = (
"trainable_name",
"experiment_tag",
"trial_id",
"status",
"last_update_time",
)

DEFAULT_RESULT_KEYS = (TRAINING_ITERATION, MEAN_ACCURACY, MEAN_LOSS)

DEFAULT_PROJECT_INFO_KEYS = (
"name",
"total_trials",
"running_trials",
"terminated_trials",
"error_trials",
"last_updated",
)

try:
TERM_HEIGHT, TERM_WIDTH = subprocess.check_output(['stty', 'size']).split()
TERM_HEIGHT, TERM_WIDTH = int(TERM_HEIGHT), int(TERM_WIDTH)
except subprocess.CalledProcessError:
TERM_HEIGHT, TERM_WIDTH = 100, 100


def _check_tabulate():
"""Checks whether tabulate is installed."""
if tabulate is None:
raise ImportError(
"Tabulate not installed. Please run `pip install tabulate`.")


def print_format_output(dataframe):
"""Prints output of given dataframe to fit into terminal.

Returns:
table (pd.DataFrame): Final outputted dataframe.
dropped_cols (list): Columns dropped due to terminal size.
empty_cols (list): Empty columns (dropped on default).
"""
print_df = pd.DataFrame()
dropped_cols = []
empty_cols = []
# column display priority is based on the info_keys passed in
for i, col in enumerate(dataframe):
if dataframe[col].isnull().all():
# Don't add col to print_df if is fully empty
empty_cols += [col]
continue

print_df[col] = dataframe[col]
test_table = tabulate(print_df, headers="keys", tablefmt="psql")
if str(test_table).index('\n') > TERM_WIDTH:
# Drop all columns beyond terminal width
print_df.drop(col, axis=1, inplace=True)
dropped_cols += list(dataframe.columns)[i:]
break

table = tabulate(
print_df, headers="keys", tablefmt="psql", showindex="never")

print(table)
if dropped_cols:
print("Dropped columns:", dropped_cols)
print("Please increase your terminal size to view remaining columns.")
if empty_cols:
print("Empty columns:", empty_cols)

return table, dropped_cols, empty_cols


def _get_experiment_state(experiment_path, exit_on_fail=False):
experiment_path = os.path.expanduser(experiment_path)
experiment_state_paths = glob.glob(
os.path.join(experiment_path, "experiment_state*.json"))
if not experiment_state_paths:
if exit_on_fail:
print("No experiment state found!")
sys.exit(0)
else:
return
experiment_filename = max(list(experiment_state_paths))

with open(experiment_filename) as f:
experiment_state = json.load(f)
return experiment_state


def list_trials(experiment_path,
sort=None,
info_keys=DEFAULT_EXPERIMENT_INFO_KEYS,
result_keys=DEFAULT_RESULT_KEYS):
"""Lists trials in the directory subtree starting at the given path.

Args:
experiment_path (str): Directory where trials are located.
Corresponds to Experiment.local_dir/Experiment.name.
sort (str): Key to sort by.
info_keys (list): Keys that are displayed.
result_keys (list): Keys of last result that are displayed.
"""
_check_tabulate()
experiment_state = _get_experiment_state(
experiment_path, exit_on_fail=True)

checkpoint_dicts = experiment_state["checkpoints"]
checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]
checkpoints_df = pd.DataFrame(checkpoint_dicts)

result_keys = ["last_result:{}".format(k) for k in result_keys]
col_keys = [
k for k in list(info_keys) + result_keys if k in checkpoints_df
]
checkpoints_df = checkpoints_df[col_keys]

if "last_update_time" in checkpoints_df:
with pd.option_context('mode.use_inf_as_null', True):
datetime_series = checkpoints_df["last_update_time"].dropna()

datetime_series = datetime_series.apply(
lambda t: datetime.fromtimestamp(t).strftime(TIMESTAMP_FORMAT))
checkpoints_df["last_update_time"] = datetime_series

if "logdir" in checkpoints_df:
# logdir often too verbose to view in table, so drop experiment_path
checkpoints_df["logdir"] = checkpoints_df["logdir"].str.replace(
experiment_path, '')

if sort:
if sort not in checkpoints_df:
raise KeyError("Sort Index '{}' not in: {}".format(
sort, list(checkpoints_df)))
checkpoints_df = checkpoints_df.sort_values(by=sort)

print_format_output(checkpoints_df)


def list_experiments(project_path,
sort=None,
info_keys=DEFAULT_PROJECT_INFO_KEYS):
"""Lists experiments in the directory subtree.

Args:
project_path (str): Directory where experiments are located.
Corresponds to Experiment.local_dir.
sort (str): Key to sort by.
info_keys (list): Keys that are displayed.
"""
_check_tabulate()
base, experiment_folders, _ = next(os.walk(project_path))

experiment_data_collection = []

for experiment_dir in experiment_folders:
experiment_state = _get_experiment_state(
os.path.join(base, experiment_dir))
if not experiment_state:
logger.debug("No experiment state found in %s", experiment_dir)
continue

checkpoints = pd.DataFrame(experiment_state["checkpoints"])
runner_data = experiment_state["runner_data"]

# Format time-based values.
time_values = {
"start_time": runner_data.get("_start_time"),
"last_updated": experiment_state.get("timestamp"),
}

formatted_time_values = {
key: datetime.fromtimestamp(val).strftime(TIMESTAMP_FORMAT)
if val else None
for key, val in time_values.items()
}

experiment_data = {
"name": experiment_dir,
"total_trials": checkpoints.shape[0],
"running_trials": (checkpoints["status"] == Trial.RUNNING).sum(),
"terminated_trials": (
checkpoints["status"] == Trial.TERMINATED).sum(),
"error_trials": (checkpoints["status"] == Trial.ERROR).sum(),
}
experiment_data.update(formatted_time_values)
experiment_data_collection.append(experiment_data)

if not experiment_data_collection:
print("No experiments found!")
sys.exit(0)

info_df = pd.DataFrame(experiment_data_collection)
col_keys = [k for k in list(info_keys) if k in info_df]

if not col_keys:
print("None of keys {} in experiment data!".format(info_keys))
sys.exit(0)

info_df = info_df[col_keys]

if sort:
if sort not in info_df:
raise KeyError("Sort Index '{}' not in: {}".format(
sort, list(info_df)))
info_df = info_df.sort_values(by=sort)

print_format_output(info_df)
4 changes: 2 additions & 2 deletions python/ray/tune/examples/mnist_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
parser.add_argument(
'--epochs',
type=int,
default=10,
default=1,
metavar='N',
help='number of epochs to train (default: 10)')
help='number of epochs to train (default: 1)')
parser.add_argument(
'--lr',
type=float,
Expand Down
4 changes: 2 additions & 2 deletions python/ray/tune/examples/mnist_pytorch_trainable.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
parser.add_argument(
'--epochs',
type=int,
default=10,
default=1,
metavar='N',
help='number of epochs to train (default: 10)')
help='number of epochs to train (default: 1)')
parser.add_argument(
'--lr',
type=float,
Expand Down
20 changes: 11 additions & 9 deletions python/ray/tune/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,8 @@

logger = logging.getLogger(__name__)

try:
import tensorflow as tf
use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >=
distutils.version.LooseVersion("1.5.0"))
except ImportError:
tf = None
use_tf150_api = True
logger.warning("Couldn't import TensorFlow - "
"disabling TensorBoard logging.")
tf = None
use_tf150_api = True


class Logger(object):
Expand Down Expand Up @@ -121,6 +114,15 @@ def to_tf_values(result, path):

class TFLogger(Logger):
def _init(self):
try:
global tf, use_tf150_api
import tensorflow
tf = tensorflow
use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >=
distutils.version.LooseVersion("1.5.0"))
except ImportError:
logger.warning("Couldn't import TensorFlow - "
"disabling TensorBoard logging.")
self._file_writer = tf.summary.FileWriter(self.logdir)

def on_result(self, result):
Expand Down
9 changes: 9 additions & 0 deletions python/ray/tune/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@
# (Auto-filled) The pid of the training process.
PID = "pid"

# (Optional) Mean reward for current training iteration
EPISODE_REWARD_MEAN = "episode_reward_mean"

# (Optional) Mean loss for training iteration
MEAN_LOSS = "mean_loss"

# (Optional) Mean accuracy for training iteration
MEAN_ACCURACY = "mean_accuracy"

# Number of episodes in this iteration.
EPISODES_THIS_ITER = "episodes_this_iter"

Expand Down
Loading