From c801000fdb4ac138671d156f67307c7d97f274d1 Mon Sep 17 00:00:00 2001
From: Alok Singh <alokbeniwal@gmail.com>
Date: Sat, 28 Apr 2018 01:04:41 -0700
Subject: [PATCH 1/5] Use set/dict literal syntax

Ran code through [pyupgrade](https://github.com/asottile/pyupgrade). This is
supported in every Python version 2.7+.
---
 examples/resnet/resnet_main.py               |  2 +-
 python/ray/cloudpickle/cloudpickle.py        |  4 ++--
 python/ray/common/test/test.py               |  2 +-
 python/ray/dataframe/groupby.py              |  2 +-
 python/ray/experimental/tfutils.py           |  2 +-
 python/ray/plasma/test/test.py               | 14 +++++++-------
 python/ray/tune/test/trial_scheduler_test.py | 16 ++++++++--------
 python/ray/tune/trial_runner.py              |  2 +-
 python/ray/worker.py                         |  4 ++--
 test/actor_test.py                           | 12 ++++++------
 test/runtest.py                              |  6 +++---
 test/stress_tests.py                         |  4 ++--
 12 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/examples/resnet/resnet_main.py b/examples/resnet/resnet_main.py
index 4b42fbee93cf..5175b6de374b 100644
--- a/examples/resnet/resnet_main.py
+++ b/examples/resnet/resnet_main.py
@@ -230,7 +230,7 @@ def train():
                 # testing task with the current weights every 200 steps.
                 acc = ray.get(acc_id)
                 acc_id = test_actor.accuracy.remote(weight_id, step)
-                print("Step {0}: {1:.6f}".format(step - 200, acc))
+                print("Step {}: {:.6f}".format(step - 200, acc))
     except KeyboardInterrupt:
         pass
 
diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py
index e5aab0591f57..2dc91bd329ed 100644
--- a/python/ray/cloudpickle/cloudpickle.py
+++ b/python/ray/cloudpickle/cloudpickle.py
@@ -572,8 +572,8 @@ def extract_code_globals(cls, co):
                 # PyPy "builtin-code" object
                 out_names = set()
             else:
-                out_names = set(names[oparg]
-                                for op, oparg in _walk_global_ops(co))
+                out_names = {names[oparg]
+                                for op, oparg in _walk_global_ops(co)}
 
                 # see if nested function have any global refs
                 if co.co_consts:
diff --git a/python/ray/common/test/test.py b/python/ray/common/test/test.py
index 5892d289fa73..4ae867ff9525 100644
--- a/python/ray/common/test/test.py
+++ b/python/ray/common/test/test.py
@@ -133,7 +133,7 @@ def test_hashability(self):
         x = random_object_id()
         y = random_object_id()
         {x: y}
-        set([x, y])
+        {x, y}
 
 
 class TestTask(unittest.TestCase):
diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py
index 892bc8f74e19..94c8c474769a 100644
--- a/python/ray/dataframe/groupby.py
+++ b/python/ray/dataframe/groupby.py
@@ -106,7 +106,7 @@ def tshift(self):
 
     @property
     def groups(self):
-        return dict([(k, pd.Index(v)) for k, v in self._keys_and_values])
+        return {k: pd.Index(v) for k, v in self._keys_and_values}
 
     def min(self, **kwargs):
         return self._apply_agg_function(lambda df: df.min(**kwargs))
diff --git a/python/ray/experimental/tfutils.py b/python/ray/experimental/tfutils.py
index 10d5fb4bc308..fc33900b8a27 100644
--- a/python/ray/experimental/tfutils.py
+++ b/python/ray/experimental/tfutils.py
@@ -49,7 +49,7 @@ def __init__(self, loss, sess=None, input_variables=None):
         self.sess = sess
         queue = deque([loss])
         variable_names = []
-        explored_inputs = set([loss])
+        explored_inputs = {loss}
 
         # We do a BFS on the dependency graph of the input function to find
         # the variables.
diff --git a/python/ray/plasma/test/test.py b/python/ray/plasma/test/test.py
index 8b0d62fe1d2a..0ec424d6a71f 100644
--- a/python/ray/plasma/test/test.py
+++ b/python/ray/plasma/test/test.py
@@ -297,7 +297,7 @@ def test_wait(self):
         self.client1.seal(obj_id1)
         ready, waiting = self.client1.wait(
             [obj_id1], timeout=100, num_returns=1)
-        self.assertEqual(set(ready), set([obj_id1]))
+        self.assertEqual(set(ready), {obj_id1})
         self.assertEqual(waiting, [])
 
         # Test wait if only one object available and only one object waited
@@ -307,8 +307,8 @@ def test_wait(self):
         # Don't seal.
         ready, waiting = self.client1.wait(
             [obj_id2, obj_id1], timeout=100, num_returns=1)
-        self.assertEqual(set(ready), set([obj_id1]))
-        self.assertEqual(set(waiting), set([obj_id2]))
+        self.assertEqual(set(ready), {obj_id1})
+        self.assertEqual(set(waiting), {obj_id2})
 
         # Test wait if object is sealed later.
         obj_id3 = random_object_id()
@@ -321,14 +321,14 @@ def finish():
         t.start()
         ready, waiting = self.client1.wait(
             [obj_id3, obj_id2, obj_id1], timeout=1000, num_returns=2)
-        self.assertEqual(set(ready), set([obj_id1, obj_id3]))
-        self.assertEqual(set(waiting), set([obj_id2]))
+        self.assertEqual(set(ready), {obj_id1, obj_id3})
+        self.assertEqual(set(waiting), {obj_id2})
 
         # Test if the appropriate number of objects is shown if some objects
         # are not ready.
         ready, waiting = self.client1.wait([obj_id3, obj_id2, obj_id1], 100, 3)
-        self.assertEqual(set(ready), set([obj_id1, obj_id3]))
-        self.assertEqual(set(waiting), set([obj_id2]))
+        self.assertEqual(set(ready), {obj_id1, obj_id3})
+        self.assertEqual(set(waiting), {obj_id2})
 
         # Don't forget to seal obj_id2.
         self.client1.seal(obj_id2)
diff --git a/python/ray/tune/test/trial_scheduler_test.py b/python/ray/tune/test/trial_scheduler_test.py
index b008af3c7d6a..a15448db79c9 100644
--- a/python/ray/tune/test/trial_scheduler_test.py
+++ b/python/ray/tune/test/trial_scheduler_test.py
@@ -688,36 +688,36 @@ def assertProduces(fn, values):
         # Categorical case
         assertProduces(
             lambda: explore({"v": 4}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x),
-            set([3, 8]))
+            {3, 8})
         assertProduces(
             lambda: explore({"v": 3}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x),
-            set([3, 4]))
+            {3, 4})
         assertProduces(
             lambda: explore({"v": 10}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x),
-            set([8, 10]))
+            {8, 10})
         assertProduces(
             lambda: explore({"v": 7}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x),
-            set([3, 4, 8, 10]))
+            {3, 4, 8, 10})
         assertProduces(
             lambda: explore({"v": 4}, {"v": [3, 4, 8, 10]}, 1.0, lambda x: x),
-            set([3, 4, 8, 10]))
+            {3, 4, 8, 10})
 
         # Continuous case
         assertProduces(
             lambda: explore(
                 {"v": 100}, {"v": lambda: random.choice([10, 100])}, 0.0,
                 lambda x: x),
-            set([80, 120]))
+            {80, 120})
         assertProduces(
             lambda: explore(
                 {"v": 100.0}, {"v": lambda: random.choice([10, 100])}, 0.0,
                 lambda x: x),
-            set([80.0, 120.0]))
+            {80.0, 120.0})
         assertProduces(
             lambda: explore(
                 {"v": 100.0}, {"v": lambda: random.choice([10, 100])}, 1.0,
                 lambda x: x),
-            set([10.0, 100.0]))
+            {10.0, 100.0})
 
     def testYieldsTimeToOtherTrials(self):
         pbt, runner = self.basicSetup()
diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py
index 56474c119495..3f07e7bb51ab 100644
--- a/python/ray/tune/trial_runner.py
+++ b/python/ray/tune/trial_runner.py
@@ -172,7 +172,7 @@ def debug_string(self, max_debug=MAX_DEBUG_TRIALS):
             if max_debug == start_num:
                 break
 
-        for local_dir in sorted(set([t.local_dir for t in self._trials])):
+        for local_dir in sorted({t.local_dir for t in self._trials}):
             messages.append("Result logdir: {}".format(local_dir))
         for state, trials in sorted(states.items()):
             limit = limit_per_state[state]
diff --git a/python/ray/worker.py b/python/ray/worker.py
index a12f93a541b1..2ed390cd7277 100644
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@@ -464,9 +464,9 @@ def get_object(self, object_ids):
         final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
         # Construct a dictionary mapping object IDs that we haven't gotten yet
         # to their original index in the object_ids argument.
-        unready_ids = dict((plain_object_ids[i].binary(), i)
+        unready_ids = {plain_object_ids[i].binary(): i
                            for (i, val) in enumerate(final_results)
-                           if val is plasma.ObjectNotAvailable)
+                           if val is plasma.ObjectNotAvailable}
         was_blocked = (len(unready_ids) > 0)
         # Try reconstructing any objects we haven't gotten yet. Try to get them
         # until at least get_timeout_milliseconds milliseconds passes, then
diff --git a/test/actor_test.py b/test/actor_test.py
index 7e040185b9fc..373e93f8eb91 100644
--- a/test/actor_test.py
+++ b/test/actor_test.py
@@ -774,7 +774,7 @@ def get_location_and_ids(self):
         # Make sure that no two actors are assigned to the same GPU.
         locations_and_ids = ray.get(
             [actor.get_location_and_ids.remote() for actor in actors])
-        node_names = set([location for location, gpu_id in locations_and_ids])
+        node_names = {location for location, gpu_id in locations_and_ids}
         self.assertEqual(len(node_names), num_local_schedulers)
         location_actor_combinations = []
         for node_name in node_names:
@@ -815,7 +815,7 @@ def get_location_and_ids(self):
         # Make sure that no two actors are assigned to the same GPU.
         locations_and_ids = ray.get(
             [actor.get_location_and_ids.remote() for actor in actors1])
-        node_names = set([location for location, gpu_id in locations_and_ids])
+        node_names = {location for location, gpu_id in locations_and_ids}
         self.assertEqual(len(node_names), num_local_schedulers)
 
         # Keep track of which GPU IDs are being used for each location.
@@ -849,7 +849,7 @@ def get_location_and_ids(self):
             [actor.get_location_and_ids.remote() for actor in actors2])
         self.assertEqual(
             node_names,
-            set([location for location, gpu_id in locations_and_ids]))
+            {location for location, gpu_id in locations_and_ids})
         for location, gpu_ids in locations_and_ids:
             gpus_in_use[location].extend(gpu_ids)
         for node_name in node_names:
@@ -887,7 +887,7 @@ def get_location_and_ids(self):
         # Make sure that no two actors are assigned to the same GPU.
         locations_and_ids = ray.get(
             [actor.get_location_and_ids.remote() for actor in actors])
-        node_names = set([location for location, gpu_id in locations_and_ids])
+        node_names = {location for location, gpu_id in locations_and_ids}
         self.assertEqual(len(node_names), 2)
         for node_name in node_names:
             node_gpu_ids = [
@@ -897,7 +897,7 @@ def get_location_and_ids(self):
             self.assertIn(len(node_gpu_ids), [5, 10])
             self.assertEqual(
                 set(node_gpu_ids),
-                set([(i, ) for i in range(len(node_gpu_ids))]))
+                {(i, ) for i in range(len(node_gpu_ids))})
 
         # Creating a new actor should fail because all of the GPUs are being
         # used.
@@ -1942,7 +1942,7 @@ def method(self):
 
         results = ray.get([result1, result2, result3])
         self.assertEqual(results[0], results[2])
-        self.assertEqual(set(results), set([0, 1]))
+        self.assertEqual(set(results), {0, 1})
 
         # Make sure that when one actor goes out of scope a new actor is
         # created because some resources have been freed up.
diff --git a/test/runtest.py b/test/runtest.py
index a44543a21294..1f95250cdf76 100644
--- a/test/runtest.py
+++ b/test/runtest.py
@@ -255,7 +255,7 @@ def temp():
 
         # Test sets.
         self.assertEqual(ray.get(f.remote(set())), set())
-        s = set([1, (1, 2, "hi")])
+        s = {1, (1, 2, "hi")}
         self.assertEqual(ray.get(f.remote(s)), s)
 
         # Test types.
@@ -1317,8 +1317,8 @@ def f():
         self.assertEqual(list_of_ids, 10 * [[]])
 
         list_of_ids = ray.get([f1.remote() for _ in range(10)])
-        set_of_ids = set([tuple(gpu_ids) for gpu_ids in list_of_ids])
-        self.assertEqual(set_of_ids, set([(i, ) for i in range(10)]))
+        set_of_ids = {tuple(gpu_ids) for gpu_ids in list_of_ids}
+        self.assertEqual(set_of_ids, {(i, ) for i in range(10)})
 
         list_of_ids = ray.get([f2.remote(), f4.remote(), f4.remote()])
         all_ids = [gpu_id for gpu_ids in list_of_ids for gpu_id in gpu_ids]
diff --git a/test/stress_tests.py b/test/stress_tests.py
index 62bf3604e72a..cb30fdbc9b03 100644
--- a/test/stress_tests.py
+++ b/test/stress_tests.py
@@ -210,8 +210,8 @@ def tearDown(self):
         state._initialize_global_state(self.redis_ip_address, self.redis_port)
         if os.environ.get('RAY_USE_NEW_GCS', False):
             tasks = state.task_table()
-            local_scheduler_ids = set(
-                task["LocalSchedulerID"] for task in tasks.values())
+            local_scheduler_ids = {
+                task["LocalSchedulerID"] for task in tasks.values()}
 
         # Make sure that all nodes in the cluster were used by checking that
         # the set of local scheduler IDs that had a task scheduled or submitted

From efa5ec85d30ff69f34e5ed93e31343fea7647bcb Mon Sep 17 00:00:00 2001
From: Alok Singh <alokbeniwal@gmail.com>
Date: Sat, 28 Apr 2018 01:05:51 -0700
Subject: [PATCH 2/5] Drop unnecessary string format specification

No need to specify 0,1.. if paramters are passed in order.
---
 python/ray/dataframe/dataframe.py      | 22 +++++++++++-----------
 python/ray/dataframe/groupby.py        |  4 ++--
 python/ray/dataframe/index_metadata.py |  2 +-
 python/ray/signature.py                |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py
index df7da328fee1..cdce331375d4 100644
--- a/python/ray/dataframe/dataframe.py
+++ b/python/ray/dataframe/dataframe.py
@@ -260,7 +260,7 @@ def __repr__(self):
         # The split here is so that we don't repr pandas row lengths.
         result = self._repr_helper_()
         final_result = repr(result).rsplit("\n\n", maxsplit=1)[0] + \
-            "\n\n[{0} rows x {1} columns]".format(len(self.index),
+            "\n\n[{} rows x {} columns]".format(len(self.index),
                                                   len(self.columns))
         return final_result
 
@@ -279,7 +279,7 @@ def _repr_html_(self):
         # We split so that we insert our correct dataframe dimensions.
         result = self._repr_helper_()._repr_html_()
         return result.split('<p>')[0] + \
-            '<p>{0} rows × {1} columns</p>\n</div>'.format(len(self.index),
+            '<p>{} rows × {} columns</p>\n</div>'.format(len(self.index),
                                                            len(self.columns))
 
     def _get_index(self):
@@ -527,7 +527,7 @@ def applymap(self, func):
         """
         if not callable(func):
             raise ValueError(
-                "\'{0}\' object is not callable".format(type(func)))
+                "\'{}\' object is not callable".format(type(func)))
 
         new_block_partitions = np.array([
             _map_partitions(lambda df: df.applymap(func), block)
@@ -1601,7 +1601,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
 
         if isinstance(value, (list, tuple)):
             raise TypeError('"value" parameter must be a scalar or dict, but '
-                            'you passed a "{0}"'.format(type(value).__name__))
+                            'you passed a "{}"'.format(type(value).__name__))
         if value is None and method is None:
             raise ValueError('must specify a fill method or value')
         if value is not None and method is not None:
@@ -1875,7 +1875,7 @@ def info_helper(df):
         index_string = self.index.summary() + '\n'
 
         # A column header is needed in the inf() output
-        col_header = 'Data columns (total {0} columns):\n'.format(
+        col_header = 'Data columns (total {} columns):\n'.format(
                 len(self.columns))
 
         # Parse the per-partition values to get the per-column details
@@ -1884,7 +1884,7 @@ def info_helper(df):
         col_lines = [prog.match(line) for line in lines]
         cols = [c.group(0) for c in col_lines if c is not None]
         # replace the partition columns names with real column names
-        columns = ["{0}\t{1}\n".format(self.columns[i],
+        columns = ["{}\t{}\n".format(self.columns[i],
                                        cols[i].split(" ", 1)[1])
                    for i in range(len(cols))]
         col_string = ''.join(columns) + '\n'
@@ -1892,7 +1892,7 @@ def info_helper(df):
         # A summary of the dtypes in the dataframe
         dtypes_string = "dtypes: "
         for dtype, count in self.dtypes.value_counts().iteritems():
-            dtypes_string += "{0}({1}),".format(dtype, count)
+            dtypes_string += "{}({}),".format(dtype, count)
         dtypes_string = dtypes_string[:-1] + '\n'
 
         # Compute the memory usage by summing per-partitions return values
@@ -1907,10 +1907,10 @@ def info_helper(df):
         if len(mem_vals) != 0:
             # Sum memory usage from each partition
             if memory_usage != 'deep':
-                memory_string = 'memory usage: {0}+ bytes'.format(
+                memory_string = 'memory usage: {}+ bytes'.format(
                         sum(mem_vals))
             else:
-                memory_string = 'memory usage: {0} bytes'.format(sum(mem_vals))
+                memory_string = 'memory usage: {} bytes'.format(sum(mem_vals))
 
         # Combine all the components of the info() output
         result = ''.join([class_string, index_string, col_header,
@@ -1939,10 +1939,10 @@ def insert(self, loc, column, value, allow_duplicates=False):
                 "Length of values does not match length of index")
         if not allow_duplicates and column in self.columns:
             raise ValueError(
-                "cannot insert {0}, already exists".format(column))
+                "cannot insert {}, already exists".format(column))
         if loc > len(self.columns):
             raise IndexError(
-                "index {0} is out of bounds for axis 0 with size {1}".format(
+                "index {} is out of bounds for axis 0 with size {}".format(
                     loc, len(self.columns)))
         if loc < 0:
             raise ValueError("unbounded slice")
diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py
index 94c8c474769a..c7a49392c21f 100644
--- a/python/ray/dataframe/groupby.py
+++ b/python/ray/dataframe/groupby.py
@@ -335,7 +335,7 @@ def take(self, **kwargs):
         return self._apply_df_function(lambda df: df.take(**kwargs))
 
     def _apply_agg_function(self, f):
-        assert callable(f), "\'{0}\' object is not callable".format(type(f))
+        assert callable(f), "\'{}\' object is not callable".format(type(f))
 
         result = [pd.DataFrame(f(v)).T for k, v in self._iter]
 
@@ -350,7 +350,7 @@ def _apply_agg_function(self, f):
         return new_df
 
     def _apply_df_function(self, f):
-        assert callable(f), "\'{0}\' object is not callable".format(type(f))
+        assert callable(f), "\'{}\' object is not callable".format(type(f))
 
         result = [f(v) for k, v in self._iter]
 
diff --git a/python/ray/dataframe/index_metadata.py b/python/ray/dataframe/index_metadata.py
index 235809ec7a35..6292c4a6cc93 100644
--- a/python/ray/dataframe/index_metadata.py
+++ b/python/ray/dataframe/index_metadata.py
@@ -226,7 +226,7 @@ def insert(self, key, loc=None, partition=None,
             partition = np.digitize(loc, cum_lens[:-1])
             if partition >= len(cum_lens):
                 if loc > cum_lens[-1]:
-                    raise IndexError("index {0} is out of bounds".format(loc))
+                    raise IndexError("index {} is out of bounds".format(loc))
                 else:
                     index_within_partition = self._lengths[-1]
             else:
diff --git a/python/ray/signature.py b/python/ray/signature.py
index c4ae60aa368f..62603d9c71ea 100644
--- a/python/ray/signature.py
+++ b/python/ray/signature.py
@@ -61,7 +61,7 @@ def func():
             for attr in attrs:
                 setattr(func, attr, getattr(original_func, attr))
         else:
-            raise TypeError("{0!r} is not a Python function we can process"
+            raise TypeError("{!r} is not a Python function we can process"
                             .format(func))
 
     return list(funcsigs.signature(func).parameters.items())

From f9fd9066440ba7b6b0f204d65a622ad7c0ab8dab Mon Sep 17 00:00:00 2001
From: Alok Singh <alokbeniwal@gmail.com>
Date: Sun, 29 Apr 2018 22:48:21 -0700
Subject: [PATCH 3/5] Revert "Drop unnecessary string format specification"

This reverts commit efa5ec85d30ff69f34e5ed93e31343fea7647bcb.
---
 python/ray/dataframe/dataframe.py      | 22 +++++++++++-----------
 python/ray/dataframe/groupby.py        |  4 ++--
 python/ray/dataframe/index_metadata.py |  2 +-
 python/ray/signature.py                |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py
index cdce331375d4..df7da328fee1 100644
--- a/python/ray/dataframe/dataframe.py
+++ b/python/ray/dataframe/dataframe.py
@@ -260,7 +260,7 @@ def __repr__(self):
         # The split here is so that we don't repr pandas row lengths.
         result = self._repr_helper_()
         final_result = repr(result).rsplit("\n\n", maxsplit=1)[0] + \
-            "\n\n[{} rows x {} columns]".format(len(self.index),
+            "\n\n[{0} rows x {1} columns]".format(len(self.index),
                                                   len(self.columns))
         return final_result
 
@@ -279,7 +279,7 @@ def _repr_html_(self):
         # We split so that we insert our correct dataframe dimensions.
         result = self._repr_helper_()._repr_html_()
         return result.split('<p>')[0] + \
-            '<p>{} rows × {} columns</p>\n</div>'.format(len(self.index),
+            '<p>{0} rows × {1} columns</p>\n</div>'.format(len(self.index),
                                                            len(self.columns))
 
     def _get_index(self):
@@ -527,7 +527,7 @@ def applymap(self, func):
         """
         if not callable(func):
             raise ValueError(
-                "\'{}\' object is not callable".format(type(func)))
+                "\'{0}\' object is not callable".format(type(func)))
 
         new_block_partitions = np.array([
             _map_partitions(lambda df: df.applymap(func), block)
@@ -1601,7 +1601,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
 
         if isinstance(value, (list, tuple)):
             raise TypeError('"value" parameter must be a scalar or dict, but '
-                            'you passed a "{}"'.format(type(value).__name__))
+                            'you passed a "{0}"'.format(type(value).__name__))
         if value is None and method is None:
             raise ValueError('must specify a fill method or value')
         if value is not None and method is not None:
@@ -1875,7 +1875,7 @@ def info_helper(df):
         index_string = self.index.summary() + '\n'
 
         # A column header is needed in the inf() output
-        col_header = 'Data columns (total {} columns):\n'.format(
+        col_header = 'Data columns (total {0} columns):\n'.format(
                 len(self.columns))
 
         # Parse the per-partition values to get the per-column details
@@ -1884,7 +1884,7 @@ def info_helper(df):
         col_lines = [prog.match(line) for line in lines]
         cols = [c.group(0) for c in col_lines if c is not None]
         # replace the partition columns names with real column names
-        columns = ["{}\t{}\n".format(self.columns[i],
+        columns = ["{0}\t{1}\n".format(self.columns[i],
                                        cols[i].split(" ", 1)[1])
                    for i in range(len(cols))]
         col_string = ''.join(columns) + '\n'
@@ -1892,7 +1892,7 @@ def info_helper(df):
         # A summary of the dtypes in the dataframe
         dtypes_string = "dtypes: "
         for dtype, count in self.dtypes.value_counts().iteritems():
-            dtypes_string += "{}({}),".format(dtype, count)
+            dtypes_string += "{0}({1}),".format(dtype, count)
         dtypes_string = dtypes_string[:-1] + '\n'
 
         # Compute the memory usage by summing per-partitions return values
@@ -1907,10 +1907,10 @@ def info_helper(df):
         if len(mem_vals) != 0:
             # Sum memory usage from each partition
             if memory_usage != 'deep':
-                memory_string = 'memory usage: {}+ bytes'.format(
+                memory_string = 'memory usage: {0}+ bytes'.format(
                         sum(mem_vals))
             else:
-                memory_string = 'memory usage: {} bytes'.format(sum(mem_vals))
+                memory_string = 'memory usage: {0} bytes'.format(sum(mem_vals))
 
         # Combine all the components of the info() output
         result = ''.join([class_string, index_string, col_header,
@@ -1939,10 +1939,10 @@ def insert(self, loc, column, value, allow_duplicates=False):
                 "Length of values does not match length of index")
         if not allow_duplicates and column in self.columns:
             raise ValueError(
-                "cannot insert {}, already exists".format(column))
+                "cannot insert {0}, already exists".format(column))
         if loc > len(self.columns):
             raise IndexError(
-                "index {} is out of bounds for axis 0 with size {}".format(
+                "index {0} is out of bounds for axis 0 with size {1}".format(
                     loc, len(self.columns)))
         if loc < 0:
             raise ValueError("unbounded slice")
diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py
index c7a49392c21f..94c8c474769a 100644
--- a/python/ray/dataframe/groupby.py
+++ b/python/ray/dataframe/groupby.py
@@ -335,7 +335,7 @@ def take(self, **kwargs):
         return self._apply_df_function(lambda df: df.take(**kwargs))
 
     def _apply_agg_function(self, f):
-        assert callable(f), "\'{}\' object is not callable".format(type(f))
+        assert callable(f), "\'{0}\' object is not callable".format(type(f))
 
         result = [pd.DataFrame(f(v)).T for k, v in self._iter]
 
@@ -350,7 +350,7 @@ def _apply_agg_function(self, f):
         return new_df
 
     def _apply_df_function(self, f):
-        assert callable(f), "\'{}\' object is not callable".format(type(f))
+        assert callable(f), "\'{0}\' object is not callable".format(type(f))
 
         result = [f(v) for k, v in self._iter]
 
diff --git a/python/ray/dataframe/index_metadata.py b/python/ray/dataframe/index_metadata.py
index 6292c4a6cc93..235809ec7a35 100644
--- a/python/ray/dataframe/index_metadata.py
+++ b/python/ray/dataframe/index_metadata.py
@@ -226,7 +226,7 @@ def insert(self, key, loc=None, partition=None,
             partition = np.digitize(loc, cum_lens[:-1])
             if partition >= len(cum_lens):
                 if loc > cum_lens[-1]:
-                    raise IndexError("index {} is out of bounds".format(loc))
+                    raise IndexError("index {0} is out of bounds".format(loc))
                 else:
                     index_within_partition = self._lengths[-1]
             else:
diff --git a/python/ray/signature.py b/python/ray/signature.py
index 62603d9c71ea..c4ae60aa368f 100644
--- a/python/ray/signature.py
+++ b/python/ray/signature.py
@@ -61,7 +61,7 @@ def func():
             for attr in attrs:
                 setattr(func, attr, getattr(original_func, attr))
         else:
-            raise TypeError("{!r} is not a Python function we can process"
+            raise TypeError("{0!r} is not a Python function we can process"
                             .format(func))
 
     return list(funcsigs.signature(func).parameters.items())

From 62fbd284794ecc7fe76202294e8c6bef457a802c Mon Sep 17 00:00:00 2001
From: Alok Singh <alokbeniwal@gmail.com>
Date: Sun, 29 Apr 2018 22:50:52 -0700
Subject: [PATCH 4/5] Undo changes to cloudpickle

Drop use of set literal until cloudpickle uses it.
---
 python/ray/cloudpickle/cloudpickle.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py
index 2dc91bd329ed..e5aab0591f57 100644
--- a/python/ray/cloudpickle/cloudpickle.py
+++ b/python/ray/cloudpickle/cloudpickle.py
@@ -572,8 +572,8 @@ def extract_code_globals(cls, co):
                 # PyPy "builtin-code" object
                 out_names = set()
             else:
-                out_names = {names[oparg]
-                                for op, oparg in _walk_global_ops(co)}
+                out_names = set(names[oparg]
+                                for op, oparg in _walk_global_ops(co))
 
                 # see if nested function have any global refs
                 if co.co_consts:

From 0b8f3f6290b14a0241bf4732d8a7899e292f0845 Mon Sep 17 00:00:00 2001
From: Alok Singh <alokbeniwal@gmail.com>
Date: Mon, 30 Apr 2018 18:19:40 -0700
Subject: [PATCH 5/5] Reformat code with YAPF

We need to set up a git pre-push hook to automatically run this stuff.
---
 python/ray/worker.py |  8 +++++---
 test/actor_test.py   | 10 +++++-----
 test/stress_tests.py |  4 +++-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/ray/worker.py b/python/ray/worker.py
index 2ed390cd7277..dc246ce8e293 100644
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@@ -464,9 +464,11 @@ def get_object(self, object_ids):
         final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
         # Construct a dictionary mapping object IDs that we haven't gotten yet
         # to their original index in the object_ids argument.
-        unready_ids = {plain_object_ids[i].binary(): i
-                           for (i, val) in enumerate(final_results)
-                           if val is plasma.ObjectNotAvailable}
+        unready_ids = {
+            plain_object_ids[i].binary(): i
+            for (i, val) in enumerate(final_results)
+            if val is plasma.ObjectNotAvailable
+        }
         was_blocked = (len(unready_ids) > 0)
         # Try reconstructing any objects we haven't gotten yet. Try to get them
         # until at least get_timeout_milliseconds milliseconds passes, then
diff --git a/test/actor_test.py b/test/actor_test.py
index 373e93f8eb91..6114d2e1feb5 100644
--- a/test/actor_test.py
+++ b/test/actor_test.py
@@ -847,9 +847,9 @@ def get_location_and_ids(self):
         # Make sure that no two actors are assigned to the same GPU.
         locations_and_ids = ray.get(
             [actor.get_location_and_ids.remote() for actor in actors2])
-        self.assertEqual(
-            node_names,
-            {location for location, gpu_id in locations_and_ids})
+        self.assertEqual(node_names,
+                         {location
+                          for location, gpu_id in locations_and_ids})
         for location, gpu_ids in locations_and_ids:
             gpus_in_use[location].extend(gpu_ids)
         for node_name in node_names:
@@ -896,8 +896,8 @@ def get_location_and_ids(self):
             ]
             self.assertIn(len(node_gpu_ids), [5, 10])
             self.assertEqual(
-                set(node_gpu_ids),
-                {(i, ) for i in range(len(node_gpu_ids))})
+                set(node_gpu_ids), {(i, )
+                                    for i in range(len(node_gpu_ids))})
 
         # Creating a new actor should fail because all of the GPUs are being
         # used.
diff --git a/test/stress_tests.py b/test/stress_tests.py
index cb30fdbc9b03..490a0149c191 100644
--- a/test/stress_tests.py
+++ b/test/stress_tests.py
@@ -211,7 +211,9 @@ def tearDown(self):
         if os.environ.get('RAY_USE_NEW_GCS', False):
             tasks = state.task_table()
             local_scheduler_ids = {
-                task["LocalSchedulerID"] for task in tasks.values()}
+                task["LocalSchedulerID"]
+                for task in tasks.values()
+            }
 
         # Make sure that all nodes in the cluster were used by checking that
         # the set of local scheduler IDs that had a task scheduled or submitted