From c801000fdb4ac138671d156f67307c7d97f274d1 Mon Sep 17 00:00:00 2001 From: Alok Singh Date: Sat, 28 Apr 2018 01:04:41 -0700 Subject: [PATCH 1/5] Use set/dict literal syntax Ran code through [pyupgrade](https://github.com/asottile/pyupgrade). This is supported in every Python version 2.7+. --- examples/resnet/resnet_main.py | 2 +- python/ray/cloudpickle/cloudpickle.py | 4 ++-- python/ray/common/test/test.py | 2 +- python/ray/dataframe/groupby.py | 2 +- python/ray/experimental/tfutils.py | 2 +- python/ray/plasma/test/test.py | 14 +++++++------- python/ray/tune/test/trial_scheduler_test.py | 16 ++++++++-------- python/ray/tune/trial_runner.py | 2 +- python/ray/worker.py | 4 ++-- test/actor_test.py | 12 ++++++------ test/runtest.py | 6 +++--- test/stress_tests.py | 4 ++-- 12 files changed, 35 insertions(+), 35 deletions(-) diff --git a/examples/resnet/resnet_main.py b/examples/resnet/resnet_main.py index 4b42fbee93cf..5175b6de374b 100644 --- a/examples/resnet/resnet_main.py +++ b/examples/resnet/resnet_main.py @@ -230,7 +230,7 @@ def train(): # testing task with the current weights every 200 steps. acc = ray.get(acc_id) acc_id = test_actor.accuracy.remote(weight_id, step) - print("Step {0}: {1:.6f}".format(step - 200, acc)) + print("Step {}: {:.6f}".format(step - 200, acc)) except KeyboardInterrupt: pass diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py index e5aab0591f57..2dc91bd329ed 100644 --- a/python/ray/cloudpickle/cloudpickle.py +++ b/python/ray/cloudpickle/cloudpickle.py @@ -572,8 +572,8 @@ def extract_code_globals(cls, co): # PyPy "builtin-code" object out_names = set() else: - out_names = set(names[oparg] - for op, oparg in _walk_global_ops(co)) + out_names = {names[oparg] + for op, oparg in _walk_global_ops(co)} # see if nested function have any global refs if co.co_consts: diff --git a/python/ray/common/test/test.py b/python/ray/common/test/test.py index 5892d289fa73..4ae867ff9525 100644 --- a/python/ray/common/test/test.py +++ b/python/ray/common/test/test.py @@ -133,7 +133,7 @@ def test_hashability(self): x = random_object_id() y = random_object_id() {x: y} - set([x, y]) + {x, y} class TestTask(unittest.TestCase): diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py index 892bc8f74e19..94c8c474769a 100644 --- a/python/ray/dataframe/groupby.py +++ b/python/ray/dataframe/groupby.py @@ -106,7 +106,7 @@ def tshift(self): @property def groups(self): - return dict([(k, pd.Index(v)) for k, v in self._keys_and_values]) + return {k: pd.Index(v) for k, v in self._keys_and_values} def min(self, **kwargs): return self._apply_agg_function(lambda df: df.min(**kwargs)) diff --git a/python/ray/experimental/tfutils.py b/python/ray/experimental/tfutils.py index 10d5fb4bc308..fc33900b8a27 100644 --- a/python/ray/experimental/tfutils.py +++ b/python/ray/experimental/tfutils.py @@ -49,7 +49,7 @@ def __init__(self, loss, sess=None, input_variables=None): self.sess = sess queue = deque([loss]) variable_names = [] - explored_inputs = set([loss]) + explored_inputs = {loss} # We do a BFS on the dependency graph of the input function to find # the variables. diff --git a/python/ray/plasma/test/test.py b/python/ray/plasma/test/test.py index 8b0d62fe1d2a..0ec424d6a71f 100644 --- a/python/ray/plasma/test/test.py +++ b/python/ray/plasma/test/test.py @@ -297,7 +297,7 @@ def test_wait(self): self.client1.seal(obj_id1) ready, waiting = self.client1.wait( [obj_id1], timeout=100, num_returns=1) - self.assertEqual(set(ready), set([obj_id1])) + self.assertEqual(set(ready), {obj_id1}) self.assertEqual(waiting, []) # Test wait if only one object available and only one object waited @@ -307,8 +307,8 @@ def test_wait(self): # Don't seal. ready, waiting = self.client1.wait( [obj_id2, obj_id1], timeout=100, num_returns=1) - self.assertEqual(set(ready), set([obj_id1])) - self.assertEqual(set(waiting), set([obj_id2])) + self.assertEqual(set(ready), {obj_id1}) + self.assertEqual(set(waiting), {obj_id2}) # Test wait if object is sealed later. obj_id3 = random_object_id() @@ -321,14 +321,14 @@ def finish(): t.start() ready, waiting = self.client1.wait( [obj_id3, obj_id2, obj_id1], timeout=1000, num_returns=2) - self.assertEqual(set(ready), set([obj_id1, obj_id3])) - self.assertEqual(set(waiting), set([obj_id2])) + self.assertEqual(set(ready), {obj_id1, obj_id3}) + self.assertEqual(set(waiting), {obj_id2}) # Test if the appropriate number of objects is shown if some objects # are not ready. ready, waiting = self.client1.wait([obj_id3, obj_id2, obj_id1], 100, 3) - self.assertEqual(set(ready), set([obj_id1, obj_id3])) - self.assertEqual(set(waiting), set([obj_id2])) + self.assertEqual(set(ready), {obj_id1, obj_id3}) + self.assertEqual(set(waiting), {obj_id2}) # Don't forget to seal obj_id2. self.client1.seal(obj_id2) diff --git a/python/ray/tune/test/trial_scheduler_test.py b/python/ray/tune/test/trial_scheduler_test.py index b008af3c7d6a..a15448db79c9 100644 --- a/python/ray/tune/test/trial_scheduler_test.py +++ b/python/ray/tune/test/trial_scheduler_test.py @@ -688,36 +688,36 @@ def assertProduces(fn, values): # Categorical case assertProduces( lambda: explore({"v": 4}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x), - set([3, 8])) + {3, 8}) assertProduces( lambda: explore({"v": 3}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x), - set([3, 4])) + {3, 4}) assertProduces( lambda: explore({"v": 10}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x), - set([8, 10])) + {8, 10}) assertProduces( lambda: explore({"v": 7}, {"v": [3, 4, 8, 10]}, 0.0, lambda x: x), - set([3, 4, 8, 10])) + {3, 4, 8, 10}) assertProduces( lambda: explore({"v": 4}, {"v": [3, 4, 8, 10]}, 1.0, lambda x: x), - set([3, 4, 8, 10])) + {3, 4, 8, 10}) # Continuous case assertProduces( lambda: explore( {"v": 100}, {"v": lambda: random.choice([10, 100])}, 0.0, lambda x: x), - set([80, 120])) + {80, 120}) assertProduces( lambda: explore( {"v": 100.0}, {"v": lambda: random.choice([10, 100])}, 0.0, lambda x: x), - set([80.0, 120.0])) + {80.0, 120.0}) assertProduces( lambda: explore( {"v": 100.0}, {"v": lambda: random.choice([10, 100])}, 1.0, lambda x: x), - set([10.0, 100.0])) + {10.0, 100.0}) def testYieldsTimeToOtherTrials(self): pbt, runner = self.basicSetup() diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index 56474c119495..3f07e7bb51ab 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -172,7 +172,7 @@ def debug_string(self, max_debug=MAX_DEBUG_TRIALS): if max_debug == start_num: break - for local_dir in sorted(set([t.local_dir for t in self._trials])): + for local_dir in sorted({t.local_dir for t in self._trials}): messages.append("Result logdir: {}".format(local_dir)) for state, trials in sorted(states.items()): limit = limit_per_state[state] diff --git a/python/ray/worker.py b/python/ray/worker.py index a12f93a541b1..2ed390cd7277 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -464,9 +464,9 @@ def get_object(self, object_ids): final_results = self.retrieve_and_deserialize(plain_object_ids, 0) # Construct a dictionary mapping object IDs that we haven't gotten yet # to their original index in the object_ids argument. - unready_ids = dict((plain_object_ids[i].binary(), i) + unready_ids = {plain_object_ids[i].binary(): i for (i, val) in enumerate(final_results) - if val is plasma.ObjectNotAvailable) + if val is plasma.ObjectNotAvailable} was_blocked = (len(unready_ids) > 0) # Try reconstructing any objects we haven't gotten yet. Try to get them # until at least get_timeout_milliseconds milliseconds passes, then diff --git a/test/actor_test.py b/test/actor_test.py index 7e040185b9fc..373e93f8eb91 100644 --- a/test/actor_test.py +++ b/test/actor_test.py @@ -774,7 +774,7 @@ def get_location_and_ids(self): # Make sure that no two actors are assigned to the same GPU. locations_and_ids = ray.get( [actor.get_location_and_ids.remote() for actor in actors]) - node_names = set([location for location, gpu_id in locations_and_ids]) + node_names = {location for location, gpu_id in locations_and_ids} self.assertEqual(len(node_names), num_local_schedulers) location_actor_combinations = [] for node_name in node_names: @@ -815,7 +815,7 @@ def get_location_and_ids(self): # Make sure that no two actors are assigned to the same GPU. locations_and_ids = ray.get( [actor.get_location_and_ids.remote() for actor in actors1]) - node_names = set([location for location, gpu_id in locations_and_ids]) + node_names = {location for location, gpu_id in locations_and_ids} self.assertEqual(len(node_names), num_local_schedulers) # Keep track of which GPU IDs are being used for each location. @@ -849,7 +849,7 @@ def get_location_and_ids(self): [actor.get_location_and_ids.remote() for actor in actors2]) self.assertEqual( node_names, - set([location for location, gpu_id in locations_and_ids])) + {location for location, gpu_id in locations_and_ids}) for location, gpu_ids in locations_and_ids: gpus_in_use[location].extend(gpu_ids) for node_name in node_names: @@ -887,7 +887,7 @@ def get_location_and_ids(self): # Make sure that no two actors are assigned to the same GPU. locations_and_ids = ray.get( [actor.get_location_and_ids.remote() for actor in actors]) - node_names = set([location for location, gpu_id in locations_and_ids]) + node_names = {location for location, gpu_id in locations_and_ids} self.assertEqual(len(node_names), 2) for node_name in node_names: node_gpu_ids = [ @@ -897,7 +897,7 @@ def get_location_and_ids(self): self.assertIn(len(node_gpu_ids), [5, 10]) self.assertEqual( set(node_gpu_ids), - set([(i, ) for i in range(len(node_gpu_ids))])) + {(i, ) for i in range(len(node_gpu_ids))}) # Creating a new actor should fail because all of the GPUs are being # used. @@ -1942,7 +1942,7 @@ def method(self): results = ray.get([result1, result2, result3]) self.assertEqual(results[0], results[2]) - self.assertEqual(set(results), set([0, 1])) + self.assertEqual(set(results), {0, 1}) # Make sure that when one actor goes out of scope a new actor is # created because some resources have been freed up. diff --git a/test/runtest.py b/test/runtest.py index a44543a21294..1f95250cdf76 100644 --- a/test/runtest.py +++ b/test/runtest.py @@ -255,7 +255,7 @@ def temp(): # Test sets. self.assertEqual(ray.get(f.remote(set())), set()) - s = set([1, (1, 2, "hi")]) + s = {1, (1, 2, "hi")} self.assertEqual(ray.get(f.remote(s)), s) # Test types. @@ -1317,8 +1317,8 @@ def f(): self.assertEqual(list_of_ids, 10 * [[]]) list_of_ids = ray.get([f1.remote() for _ in range(10)]) - set_of_ids = set([tuple(gpu_ids) for gpu_ids in list_of_ids]) - self.assertEqual(set_of_ids, set([(i, ) for i in range(10)])) + set_of_ids = {tuple(gpu_ids) for gpu_ids in list_of_ids} + self.assertEqual(set_of_ids, {(i, ) for i in range(10)}) list_of_ids = ray.get([f2.remote(), f4.remote(), f4.remote()]) all_ids = [gpu_id for gpu_ids in list_of_ids for gpu_id in gpu_ids] diff --git a/test/stress_tests.py b/test/stress_tests.py index 62bf3604e72a..cb30fdbc9b03 100644 --- a/test/stress_tests.py +++ b/test/stress_tests.py @@ -210,8 +210,8 @@ def tearDown(self): state._initialize_global_state(self.redis_ip_address, self.redis_port) if os.environ.get('RAY_USE_NEW_GCS', False): tasks = state.task_table() - local_scheduler_ids = set( - task["LocalSchedulerID"] for task in tasks.values()) + local_scheduler_ids = { + task["LocalSchedulerID"] for task in tasks.values()} # Make sure that all nodes in the cluster were used by checking that # the set of local scheduler IDs that had a task scheduled or submitted From efa5ec85d30ff69f34e5ed93e31343fea7647bcb Mon Sep 17 00:00:00 2001 From: Alok Singh Date: Sat, 28 Apr 2018 01:05:51 -0700 Subject: [PATCH 2/5] Drop unnecessary string format specification No need to specify 0,1.. if paramters are passed in order. --- python/ray/dataframe/dataframe.py | 22 +++++++++++----------- python/ray/dataframe/groupby.py | 4 ++-- python/ray/dataframe/index_metadata.py | 2 +- python/ray/signature.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index df7da328fee1..cdce331375d4 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -260,7 +260,7 @@ def __repr__(self): # The split here is so that we don't repr pandas row lengths. result = self._repr_helper_() final_result = repr(result).rsplit("\n\n", maxsplit=1)[0] + \ - "\n\n[{0} rows x {1} columns]".format(len(self.index), + "\n\n[{} rows x {} columns]".format(len(self.index), len(self.columns)) return final_result @@ -279,7 +279,7 @@ def _repr_html_(self): # We split so that we insert our correct dataframe dimensions. result = self._repr_helper_()._repr_html_() return result.split('

')[0] + \ - '

{0} rows × {1} columns

\n'.format(len(self.index), + '

{} rows × {} columns

\n'.format(len(self.index), len(self.columns)) def _get_index(self): @@ -527,7 +527,7 @@ def applymap(self, func): """ if not callable(func): raise ValueError( - "\'{0}\' object is not callable".format(type(func))) + "\'{}\' object is not callable".format(type(func))) new_block_partitions = np.array([ _map_partitions(lambda df: df.applymap(func), block) @@ -1601,7 +1601,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) + 'you passed a "{}"'.format(type(value).__name__)) if value is None and method is None: raise ValueError('must specify a fill method or value') if value is not None and method is not None: @@ -1875,7 +1875,7 @@ def info_helper(df): index_string = self.index.summary() + '\n' # A column header is needed in the inf() output - col_header = 'Data columns (total {0} columns):\n'.format( + col_header = 'Data columns (total {} columns):\n'.format( len(self.columns)) # Parse the per-partition values to get the per-column details @@ -1884,7 +1884,7 @@ def info_helper(df): col_lines = [prog.match(line) for line in lines] cols = [c.group(0) for c in col_lines if c is not None] # replace the partition columns names with real column names - columns = ["{0}\t{1}\n".format(self.columns[i], + columns = ["{}\t{}\n".format(self.columns[i], cols[i].split(" ", 1)[1]) for i in range(len(cols))] col_string = ''.join(columns) + '\n' @@ -1892,7 +1892,7 @@ def info_helper(df): # A summary of the dtypes in the dataframe dtypes_string = "dtypes: " for dtype, count in self.dtypes.value_counts().iteritems(): - dtypes_string += "{0}({1}),".format(dtype, count) + dtypes_string += "{}({}),".format(dtype, count) dtypes_string = dtypes_string[:-1] + '\n' # Compute the memory usage by summing per-partitions return values @@ -1907,10 +1907,10 @@ def info_helper(df): if len(mem_vals) != 0: # Sum memory usage from each partition if memory_usage != 'deep': - memory_string = 'memory usage: {0}+ bytes'.format( + memory_string = 'memory usage: {}+ bytes'.format( sum(mem_vals)) else: - memory_string = 'memory usage: {0} bytes'.format(sum(mem_vals)) + memory_string = 'memory usage: {} bytes'.format(sum(mem_vals)) # Combine all the components of the info() output result = ''.join([class_string, index_string, col_header, @@ -1939,10 +1939,10 @@ def insert(self, loc, column, value, allow_duplicates=False): "Length of values does not match length of index") if not allow_duplicates and column in self.columns: raise ValueError( - "cannot insert {0}, already exists".format(column)) + "cannot insert {}, already exists".format(column)) if loc > len(self.columns): raise IndexError( - "index {0} is out of bounds for axis 0 with size {1}".format( + "index {} is out of bounds for axis 0 with size {}".format( loc, len(self.columns))) if loc < 0: raise ValueError("unbounded slice") diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py index 94c8c474769a..c7a49392c21f 100644 --- a/python/ray/dataframe/groupby.py +++ b/python/ray/dataframe/groupby.py @@ -335,7 +335,7 @@ def take(self, **kwargs): return self._apply_df_function(lambda df: df.take(**kwargs)) def _apply_agg_function(self, f): - assert callable(f), "\'{0}\' object is not callable".format(type(f)) + assert callable(f), "\'{}\' object is not callable".format(type(f)) result = [pd.DataFrame(f(v)).T for k, v in self._iter] @@ -350,7 +350,7 @@ def _apply_agg_function(self, f): return new_df def _apply_df_function(self, f): - assert callable(f), "\'{0}\' object is not callable".format(type(f)) + assert callable(f), "\'{}\' object is not callable".format(type(f)) result = [f(v) for k, v in self._iter] diff --git a/python/ray/dataframe/index_metadata.py b/python/ray/dataframe/index_metadata.py index 235809ec7a35..6292c4a6cc93 100644 --- a/python/ray/dataframe/index_metadata.py +++ b/python/ray/dataframe/index_metadata.py @@ -226,7 +226,7 @@ def insert(self, key, loc=None, partition=None, partition = np.digitize(loc, cum_lens[:-1]) if partition >= len(cum_lens): if loc > cum_lens[-1]: - raise IndexError("index {0} is out of bounds".format(loc)) + raise IndexError("index {} is out of bounds".format(loc)) else: index_within_partition = self._lengths[-1] else: diff --git a/python/ray/signature.py b/python/ray/signature.py index c4ae60aa368f..62603d9c71ea 100644 --- a/python/ray/signature.py +++ b/python/ray/signature.py @@ -61,7 +61,7 @@ def func(): for attr in attrs: setattr(func, attr, getattr(original_func, attr)) else: - raise TypeError("{0!r} is not a Python function we can process" + raise TypeError("{!r} is not a Python function we can process" .format(func)) return list(funcsigs.signature(func).parameters.items()) From f9fd9066440ba7b6b0f204d65a622ad7c0ab8dab Mon Sep 17 00:00:00 2001 From: Alok Singh Date: Sun, 29 Apr 2018 22:48:21 -0700 Subject: [PATCH 3/5] Revert "Drop unnecessary string format specification" This reverts commit efa5ec85d30ff69f34e5ed93e31343fea7647bcb. --- python/ray/dataframe/dataframe.py | 22 +++++++++++----------- python/ray/dataframe/groupby.py | 4 ++-- python/ray/dataframe/index_metadata.py | 2 +- python/ray/signature.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index cdce331375d4..df7da328fee1 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -260,7 +260,7 @@ def __repr__(self): # The split here is so that we don't repr pandas row lengths. result = self._repr_helper_() final_result = repr(result).rsplit("\n\n", maxsplit=1)[0] + \ - "\n\n[{} rows x {} columns]".format(len(self.index), + "\n\n[{0} rows x {1} columns]".format(len(self.index), len(self.columns)) return final_result @@ -279,7 +279,7 @@ def _repr_html_(self): # We split so that we insert our correct dataframe dimensions. result = self._repr_helper_()._repr_html_() return result.split('

')[0] + \ - '

{} rows × {} columns

\n'.format(len(self.index), + '

{0} rows × {1} columns

\n'.format(len(self.index), len(self.columns)) def _get_index(self): @@ -527,7 +527,7 @@ def applymap(self, func): """ if not callable(func): raise ValueError( - "\'{}\' object is not callable".format(type(func))) + "\'{0}\' object is not callable".format(type(func))) new_block_partitions = np.array([ _map_partitions(lambda df: df.applymap(func), block) @@ -1601,7 +1601,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{}"'.format(type(value).__name__)) + 'you passed a "{0}"'.format(type(value).__name__)) if value is None and method is None: raise ValueError('must specify a fill method or value') if value is not None and method is not None: @@ -1875,7 +1875,7 @@ def info_helper(df): index_string = self.index.summary() + '\n' # A column header is needed in the inf() output - col_header = 'Data columns (total {} columns):\n'.format( + col_header = 'Data columns (total {0} columns):\n'.format( len(self.columns)) # Parse the per-partition values to get the per-column details @@ -1884,7 +1884,7 @@ def info_helper(df): col_lines = [prog.match(line) for line in lines] cols = [c.group(0) for c in col_lines if c is not None] # replace the partition columns names with real column names - columns = ["{}\t{}\n".format(self.columns[i], + columns = ["{0}\t{1}\n".format(self.columns[i], cols[i].split(" ", 1)[1]) for i in range(len(cols))] col_string = ''.join(columns) + '\n' @@ -1892,7 +1892,7 @@ def info_helper(df): # A summary of the dtypes in the dataframe dtypes_string = "dtypes: " for dtype, count in self.dtypes.value_counts().iteritems(): - dtypes_string += "{}({}),".format(dtype, count) + dtypes_string += "{0}({1}),".format(dtype, count) dtypes_string = dtypes_string[:-1] + '\n' # Compute the memory usage by summing per-partitions return values @@ -1907,10 +1907,10 @@ def info_helper(df): if len(mem_vals) != 0: # Sum memory usage from each partition if memory_usage != 'deep': - memory_string = 'memory usage: {}+ bytes'.format( + memory_string = 'memory usage: {0}+ bytes'.format( sum(mem_vals)) else: - memory_string = 'memory usage: {} bytes'.format(sum(mem_vals)) + memory_string = 'memory usage: {0} bytes'.format(sum(mem_vals)) # Combine all the components of the info() output result = ''.join([class_string, index_string, col_header, @@ -1939,10 +1939,10 @@ def insert(self, loc, column, value, allow_duplicates=False): "Length of values does not match length of index") if not allow_duplicates and column in self.columns: raise ValueError( - "cannot insert {}, already exists".format(column)) + "cannot insert {0}, already exists".format(column)) if loc > len(self.columns): raise IndexError( - "index {} is out of bounds for axis 0 with size {}".format( + "index {0} is out of bounds for axis 0 with size {1}".format( loc, len(self.columns))) if loc < 0: raise ValueError("unbounded slice") diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py index c7a49392c21f..94c8c474769a 100644 --- a/python/ray/dataframe/groupby.py +++ b/python/ray/dataframe/groupby.py @@ -335,7 +335,7 @@ def take(self, **kwargs): return self._apply_df_function(lambda df: df.take(**kwargs)) def _apply_agg_function(self, f): - assert callable(f), "\'{}\' object is not callable".format(type(f)) + assert callable(f), "\'{0}\' object is not callable".format(type(f)) result = [pd.DataFrame(f(v)).T for k, v in self._iter] @@ -350,7 +350,7 @@ def _apply_agg_function(self, f): return new_df def _apply_df_function(self, f): - assert callable(f), "\'{}\' object is not callable".format(type(f)) + assert callable(f), "\'{0}\' object is not callable".format(type(f)) result = [f(v) for k, v in self._iter] diff --git a/python/ray/dataframe/index_metadata.py b/python/ray/dataframe/index_metadata.py index 6292c4a6cc93..235809ec7a35 100644 --- a/python/ray/dataframe/index_metadata.py +++ b/python/ray/dataframe/index_metadata.py @@ -226,7 +226,7 @@ def insert(self, key, loc=None, partition=None, partition = np.digitize(loc, cum_lens[:-1]) if partition >= len(cum_lens): if loc > cum_lens[-1]: - raise IndexError("index {} is out of bounds".format(loc)) + raise IndexError("index {0} is out of bounds".format(loc)) else: index_within_partition = self._lengths[-1] else: diff --git a/python/ray/signature.py b/python/ray/signature.py index 62603d9c71ea..c4ae60aa368f 100644 --- a/python/ray/signature.py +++ b/python/ray/signature.py @@ -61,7 +61,7 @@ def func(): for attr in attrs: setattr(func, attr, getattr(original_func, attr)) else: - raise TypeError("{!r} is not a Python function we can process" + raise TypeError("{0!r} is not a Python function we can process" .format(func)) return list(funcsigs.signature(func).parameters.items()) From 62fbd284794ecc7fe76202294e8c6bef457a802c Mon Sep 17 00:00:00 2001 From: Alok Singh Date: Sun, 29 Apr 2018 22:50:52 -0700 Subject: [PATCH 4/5] Undo changes to cloudpickle Drop use of set literal until cloudpickle uses it. --- python/ray/cloudpickle/cloudpickle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py index 2dc91bd329ed..e5aab0591f57 100644 --- a/python/ray/cloudpickle/cloudpickle.py +++ b/python/ray/cloudpickle/cloudpickle.py @@ -572,8 +572,8 @@ def extract_code_globals(cls, co): # PyPy "builtin-code" object out_names = set() else: - out_names = {names[oparg] - for op, oparg in _walk_global_ops(co)} + out_names = set(names[oparg] + for op, oparg in _walk_global_ops(co)) # see if nested function have any global refs if co.co_consts: From 0b8f3f6290b14a0241bf4732d8a7899e292f0845 Mon Sep 17 00:00:00 2001 From: Alok Singh Date: Mon, 30 Apr 2018 18:19:40 -0700 Subject: [PATCH 5/5] Reformat code with YAPF We need to set up a git pre-push hook to automatically run this stuff. --- python/ray/worker.py | 8 +++++--- test/actor_test.py | 10 +++++----- test/stress_tests.py | 4 +++- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/python/ray/worker.py b/python/ray/worker.py index 2ed390cd7277..dc246ce8e293 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -464,9 +464,11 @@ def get_object(self, object_ids): final_results = self.retrieve_and_deserialize(plain_object_ids, 0) # Construct a dictionary mapping object IDs that we haven't gotten yet # to their original index in the object_ids argument. - unready_ids = {plain_object_ids[i].binary(): i - for (i, val) in enumerate(final_results) - if val is plasma.ObjectNotAvailable} + unready_ids = { + plain_object_ids[i].binary(): i + for (i, val) in enumerate(final_results) + if val is plasma.ObjectNotAvailable + } was_blocked = (len(unready_ids) > 0) # Try reconstructing any objects we haven't gotten yet. Try to get them # until at least get_timeout_milliseconds milliseconds passes, then diff --git a/test/actor_test.py b/test/actor_test.py index 373e93f8eb91..6114d2e1feb5 100644 --- a/test/actor_test.py +++ b/test/actor_test.py @@ -847,9 +847,9 @@ def get_location_and_ids(self): # Make sure that no two actors are assigned to the same GPU. locations_and_ids = ray.get( [actor.get_location_and_ids.remote() for actor in actors2]) - self.assertEqual( - node_names, - {location for location, gpu_id in locations_and_ids}) + self.assertEqual(node_names, + {location + for location, gpu_id in locations_and_ids}) for location, gpu_ids in locations_and_ids: gpus_in_use[location].extend(gpu_ids) for node_name in node_names: @@ -896,8 +896,8 @@ def get_location_and_ids(self): ] self.assertIn(len(node_gpu_ids), [5, 10]) self.assertEqual( - set(node_gpu_ids), - {(i, ) for i in range(len(node_gpu_ids))}) + set(node_gpu_ids), {(i, ) + for i in range(len(node_gpu_ids))}) # Creating a new actor should fail because all of the GPUs are being # used. diff --git a/test/stress_tests.py b/test/stress_tests.py index cb30fdbc9b03..490a0149c191 100644 --- a/test/stress_tests.py +++ b/test/stress_tests.py @@ -211,7 +211,9 @@ def tearDown(self): if os.environ.get('RAY_USE_NEW_GCS', False): tasks = state.task_table() local_scheduler_ids = { - task["LocalSchedulerID"] for task in tasks.values()} + task["LocalSchedulerID"] + for task in tasks.values() + } # Make sure that all nodes in the cluster were used by checking that # the set of local scheduler IDs that had a task scheduled or submitted