diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst index 056b7c3fc791..cdf305d211f5 100644 --- a/doc/source/rllib-env.rst +++ b/doc/source/rllib-env.rst @@ -92,7 +92,7 @@ In the above example, note that the ``env_creator`` function takes in an ``env_c OpenAI Gym ---------- -RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may also find the `SimpleCorridor `__ and `Carla simulator `__ example env implementations useful as a reference. +RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may find the `SimpleCorridor `__ example useful as a reference. Performance ~~~~~~~~~~~ diff --git a/doc/source/rllib-models.rst b/doc/source/rllib-models.rst index 7fd860a65a3e..b429e04be417 100644 --- a/doc/source/rllib-models.rst +++ b/doc/source/rllib-models.rst @@ -134,7 +134,7 @@ Custom TF models should subclass the common RLlib `model class `__ and associated `training scripts `__. You can also reference the `unit tests `__ for Tuple and Dict spaces, which show how to access nested observation fields. +For a full example of a custom model in code, see the `custom env example `__. You can also reference the `unit tests `__ for Tuple and Dict spaces, which show how to access nested observation fields. Custom Recurrent Models ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py index 6c4917ad853f..675f9187f2c6 100644 --- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py +++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py @@ -399,8 +399,6 @@ def set_state(self, state): self.set_pure_exploration_phase(state[2]) def _build_q_network(self, obs, obs_space, action_space, actions): - import tensorflow.contrib.layers as layers - if self.config["use_state_preprocessor"]: q_model = ModelCatalog.get_model({ "obs": obs, @@ -413,16 +411,12 @@ def _build_q_network(self, obs, obs_space, action_space, actions): activation = getattr(tf.nn, self.config["critic_hidden_activation"]) for hidden in self.config["critic_hiddens"]: - q_out = layers.fully_connected( - q_out, num_outputs=hidden, activation_fn=activation) - q_values = layers.fully_connected( - q_out, num_outputs=1, activation_fn=None) + q_out = tf.layers.dense(q_out, units=hidden, activation=activation) + q_values = tf.layers.dense(q_out, units=1, activation=None) return q_values, q_model def _build_policy_network(self, obs, obs_space, action_space): - import tensorflow.contrib.layers as layers - if self.config["use_state_preprocessor"]: model = ModelCatalog.get_model({ "obs": obs, @@ -434,16 +428,19 @@ def _build_policy_network(self, obs, obs_space, action_space): action_out = obs activation = getattr(tf.nn, self.config["actor_hidden_activation"]) - normalizer_fn = layers.layer_norm if self.config["parameter_noise"] \ - else None for hidden in self.config["actor_hiddens"]: - action_out = layers.fully_connected( - action_out, - num_outputs=hidden, - activation_fn=activation, - normalizer_fn=normalizer_fn) - action_out = layers.fully_connected( - action_out, num_outputs=self.dim_actions, activation_fn=None) + if self.config["parameter_noise"]: + import tensorflow.contrib.layers as layers + action_out = layers.fully_connected( + action_out, + num_outputs=hidden, + activation_fn=activation, + normalizer_fn=layers.layer_norm) + else: + action_out = tf.layers.dense( + action_out, units=hidden, activation=activation) + action_out = tf.layers.dense( + action_out, units=self.dim_actions, activation=None) # Use sigmoid to scale to [0,1], but also double magnitude of input to # emulate behaviour of tanh activation used in DDPG and TD3 papers. @@ -507,7 +504,7 @@ def make_noisy_actions(): def make_uniform_random_actions(): # pure random exploration option - uniform_random_actions = tf.random.uniform( + uniform_random_actions = tf.random_uniform( tf.shape(deterministic_actions)) # rescale uniform random actions according to action range tf_range = tf.constant(action_range[None], dtype="float32") diff --git a/python/ray/rllib/agents/dqn/dqn_policy_graph.py b/python/ray/rllib/agents/dqn/dqn_policy_graph.py index 5af38ed9e958..1e682ce80cfa 100644 --- a/python/ray/rllib/agents/dqn/dqn_policy_graph.py +++ b/python/ray/rllib/agents/dqn/dqn_policy_graph.py @@ -154,8 +154,6 @@ def __init__(self, v_max=10.0, sigma0=0.5, parameter_noise=False): - import tensorflow.contrib.layers as layers - self.model = model with tf.variable_scope("action_value"): if hiddens: @@ -164,13 +162,18 @@ def __init__(self, if use_noisy: action_out = self.noisy_layer( "hidden_%d" % i, action_out, hiddens[i], sigma0) - else: + elif parameter_noise: + import tensorflow.contrib.layers as layers action_out = layers.fully_connected( action_out, num_outputs=hiddens[i], activation_fn=tf.nn.relu, - normalizer_fn=layers.layer_norm - if parameter_noise else None) + normalizer_fn=layers.layer_norm) + else: + action_out = tf.layers.dense( + action_out, + units=hiddens[i], + activation=tf.nn.relu) else: # Avoid postprocessing the outputs. This enables custom models # to be used for parametric action DQN. @@ -183,10 +186,8 @@ def __init__(self, sigma0, non_linear=False) elif hiddens: - action_scores = layers.fully_connected( - action_out, - num_outputs=num_actions * num_atoms, - activation_fn=None) + action_scores = tf.layers.dense( + action_out, units=num_actions * num_atoms, activation=None) else: action_scores = model.outputs if num_atoms > 1: @@ -214,13 +215,15 @@ def __init__(self, state_out = self.noisy_layer("dueling_hidden_%d" % i, state_out, hiddens[i], sigma0) - else: - state_out = layers.fully_connected( + elif parameter_noise: + state_out = tf.contrib.layers.fully_connected( state_out, num_outputs=hiddens[i], activation_fn=tf.nn.relu, - normalizer_fn=layers.layer_norm - if parameter_noise else None) + normalizer_fn=tf.contrib.layers.layer_norm) + else: + state_out = tf.layers.dense( + state_out, units=hiddens[i], activation=tf.nn.relu) if use_noisy: state_score = self.noisy_layer( "dueling_output", @@ -229,8 +232,8 @@ def __init__(self, sigma0, non_linear=False) else: - state_score = layers.fully_connected( - state_out, num_outputs=num_atoms, activation_fn=None) + state_score = tf.layers.dense( + state_out, units=num_atoms, activation=None) if num_atoms > 1: support_logits_per_action_mean = tf.reduce_mean( support_logits_per_action, 1) diff --git a/python/ray/rllib/agents/impala/vtrace.py b/python/ray/rllib/agents/impala/vtrace.py index cc560d9937e4..9e164cc66dd1 100644 --- a/python/ray/rllib/agents/impala/vtrace.py +++ b/python/ray/rllib/agents/impala/vtrace.py @@ -37,8 +37,6 @@ from ray.rllib.utils import try_import_tf tf = try_import_tf() -if tf: - nest = tf.contrib.framework.nest VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [ "vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs", diff --git a/python/ray/rllib/agents/impala/vtrace_policy_graph.py b/python/ray/rllib/agents/impala/vtrace_policy_graph.py index 702aefb50a6e..b74cb7fe5ab0 100644 --- a/python/ray/rllib/agents/impala/vtrace_policy_graph.py +++ b/python/ray/rllib/agents/impala/vtrace_policy_graph.py @@ -279,14 +279,11 @@ def make_time_major(tensor, drop_last=False): self.KL_stats.update({ "mean_KL_{}".format(i): tf.reduce_mean(kl), "max_KL_{}".format(i): tf.reduce_max(kl), - "median_KL_{}".format(i): tf.contrib.distributions. - percentile(kl, 50.0), }) else: self.KL_stats = { "mean_KL": tf.reduce_mean(kls[0]), "max_KL": tf.reduce_max(kls[0]), - "median_KL": tf.contrib.distributions.percentile(kls[0], 50.0), } # Initialize TFPolicyGraph diff --git a/python/ray/rllib/agents/impala/vtrace_test.py b/python/ray/rllib/agents/impala/vtrace_test.py index 145ed4e7a2cd..e1f39991b097 100644 --- a/python/ray/rllib/agents/impala/vtrace_test.py +++ b/python/ray/rllib/agents/impala/vtrace_test.py @@ -26,8 +26,10 @@ from absl.testing import parameterized import numpy as np -import tensorflow as tf import vtrace +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() def _shaped_arange(*shape): diff --git a/python/ray/rllib/agents/ppo/appo_policy_graph.py b/python/ray/rllib/agents/ppo/appo_policy_graph.py index 64523c60d1b3..c4b4caf29ae4 100644 --- a/python/ray/rllib/agents/ppo/appo_policy_graph.py +++ b/python/ray/rllib/agents/ppo/appo_policy_graph.py @@ -400,14 +400,11 @@ def make_time_major(tensor, drop_last=False): self.KL_stats.update({ "mean_KL_{}".format(i): tf.reduce_mean(kl), "max_KL_{}".format(i): tf.reduce_max(kl), - "median_KL_{}".format(i): tf.contrib.distributions. - percentile(kl, 50.0), }) else: self.KL_stats = { "mean_KL": tf.reduce_mean(kls[0]), "max_KL": tf.reduce_max(kls[0]), - "median_KL": tf.contrib.distributions.percentile(kls[0], 50.0), } # Initialize TFPolicyGraph diff --git a/python/ray/rllib/agents/ppo/test/test.py b/python/ray/rllib/agents/ppo/test/test.py index 432b22f9aed2..1091b639c6f4 100644 --- a/python/ray/rllib/agents/ppo/test/test.py +++ b/python/ray/rllib/agents/ppo/test/test.py @@ -4,11 +4,13 @@ import unittest import numpy as np -import tensorflow as tf from numpy.testing import assert_allclose from ray.rllib.models.action_dist import Categorical from ray.rllib.agents.ppo.utils import flatten, concatenate +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() # TODO(ekl): move to rllib/models dir diff --git a/python/ray/rllib/examples/batch_norm_model.py b/python/ray/rllib/examples/batch_norm_model.py index 7852a62c2c24..c8a3fc83c0e4 100644 --- a/python/ray/rllib/examples/batch_norm_model.py +++ b/python/ray/rllib/examples/batch_norm_model.py @@ -5,13 +5,13 @@ import argparse -import tensorflow as tf -import tensorflow.contrib.slim as slim - import ray from ray import tune from ray.rllib.models import Model, ModelCatalog from ray.rllib.models.misc import normc_initializer +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=200) @@ -24,21 +24,21 @@ def _build_layers_v2(self, input_dict, num_outputs, options): hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) - last_layer = slim.fully_connected( + last_layer = tf.layers.dense( last_layer, size, - weights_initializer=normc_initializer(1.0), - activation_fn=tf.nn.tanh, - scope=label) + kernel_initializer=normc_initializer(1.0), + activation=tf.nn.tanh, + name=label) # Add a batch norm layer last_layer = tf.layers.batch_normalization( last_layer, training=input_dict["is_training"]) - output = slim.fully_connected( + output = tf.layers.dense( last_layer, num_outputs, - weights_initializer=normc_initializer(0.01), - activation_fn=None, - scope="fc_out") + kernel_initializer=normc_initializer(0.01), + activation=None, + name="fc_out") return output, last_layer diff --git a/python/ray/rllib/examples/carla/README b/python/ray/rllib/examples/carla/README deleted file mode 100644 index a066b048a2a1..000000000000 --- a/python/ray/rllib/examples/carla/README +++ /dev/null @@ -1,14 +0,0 @@ -(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla - -To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0 - -Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner. - -Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training. - - $ pkill -9 Carla - $ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh - $ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient - $ python env.py - -Check out the scenarios.py file for different training and test scenarios that can be used. diff --git a/python/ray/rllib/examples/carla/env.py b/python/ray/rllib/examples/carla/env.py deleted file mode 100644 index af5b619afcdb..000000000000 --- a/python/ray/rllib/examples/carla/env.py +++ /dev/null @@ -1,684 +0,0 @@ -"""OpenAI gym environment for Carla. Run this file for a demo.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datetime import datetime -import atexit -import cv2 -import os -import json -import random -import signal -import subprocess -import sys -import time -import traceback - -import numpy as np -try: - import scipy.misc -except Exception: - pass - -import gym -from gym.spaces import Box, Discrete, Tuple - -from scenarios import DEFAULT_SCENARIO - -# Set this where you want to save image outputs (or empty string to disable) -CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out")) -if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH): - os.makedirs(CARLA_OUT_PATH) - -# Set this to the path of your Carla binary -SERVER_BINARY = os.environ.get("CARLA_SERVER", - os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh")) - -assert os.path.exists(SERVER_BINARY) -if "CARLA_PY_PATH" in os.environ: - sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"])) -else: - # TODO(ekl) switch this to the binary path once the planner is in master - sys.path.append(os.path.expanduser("~/carla/PythonClient/")) - -try: - from carla.client import CarlaClient - from carla.sensor import Camera - from carla.settings import CarlaSettings - from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \ - TURN_RIGHT, TURN_LEFT, LANE_FOLLOW -except Exception as e: - print("Failed to import Carla python libs, try setting $CARLA_PY_PATH") - raise e - -# Carla planner commands -COMMANDS_ENUM = { - REACH_GOAL: "REACH_GOAL", - GO_STRAIGHT: "GO_STRAIGHT", - TURN_RIGHT: "TURN_RIGHT", - TURN_LEFT: "TURN_LEFT", - LANE_FOLLOW: "LANE_FOLLOW", -} - -# Mapping from string repr to one-hot encoding index to feed to the model -COMMAND_ORDINAL = { - "REACH_GOAL": 0, - "GO_STRAIGHT": 1, - "TURN_RIGHT": 2, - "TURN_LEFT": 3, - "LANE_FOLLOW": 4, -} - -# Number of retries if the server doesn't respond -RETRIES_ON_ERROR = 5 - -# Dummy Z coordinate to use when we only care about (x, y) -GROUND_Z = 22 - -# Default environment configuration -ENV_CONFIG = { - "log_images": True, - "enable_planner": True, - "framestack": 2, # note: only [1, 2] currently supported - "convert_images_to_video": True, - "early_terminate_on_collision": True, - "verbose": True, - "reward_function": "custom", - "render_x_res": 800, - "render_y_res": 600, - "x_res": 80, - "y_res": 80, - "server_map": "/Game/Maps/Town02", - "scenarios": [DEFAULT_SCENARIO], - "use_depth_camera": False, - "discrete_actions": True, - "squash_action_logits": False, -} - -DISCRETE_ACTIONS = { - # coast - 0: [0.0, 0.0], - # turn left - 1: [0.0, -0.5], - # turn right - 2: [0.0, 0.5], - # forward - 3: [1.0, 0.0], - # brake - 4: [-0.5, 0.0], - # forward left - 5: [1.0, -0.5], - # forward right - 6: [1.0, 0.5], - # brake left - 7: [-0.5, -0.5], - # brake right - 8: [-0.5, 0.5], -} - -live_carla_processes = set() - - -def cleanup(): - print("Killing live carla processes", live_carla_processes) - for pgid in live_carla_processes: - os.killpg(pgid, signal.SIGKILL) - - -atexit.register(cleanup) - - -class CarlaEnv(gym.Env): - def __init__(self, config=ENV_CONFIG): - self.config = config - self.city = self.config["server_map"].split("/")[-1] - if self.config["enable_planner"]: - self.planner = Planner(self.city) - - if config["discrete_actions"]: - self.action_space = Discrete(len(DISCRETE_ACTIONS)) - else: - self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32) - if config["use_depth_camera"]: - image_space = Box( - -1.0, - 1.0, - shape=(config["y_res"], config["x_res"], - 1 * config["framestack"]), - dtype=np.float32) - else: - image_space = Box( - 0, - 255, - shape=(config["y_res"], config["x_res"], - 3 * config["framestack"]), - dtype=np.uint8) - self.observation_space = Tuple( # forward_speed, dist to goal - [ - image_space, - Discrete(len(COMMANDS_ENUM)), # next_command - Box(-128.0, 128.0, shape=(2, ), dtype=np.float32) - ]) - - # TODO(ekl) this isn't really a proper gym spec - self._spec = lambda: None - self._spec.id = "Carla-v0" - - self.server_port = None - self.server_process = None - self.client = None - self.num_steps = 0 - self.total_reward = 0 - self.prev_measurement = None - self.prev_image = None - self.episode_id = None - self.measurements_file = None - self.weather = None - self.scenario = None - self.start_pos = None - self.end_pos = None - self.start_coord = None - self.end_coord = None - self.last_obs = None - - def init_server(self): - print("Initializing new Carla server...") - # Create a new server process and start the client. - self.server_port = random.randint(10000, 60000) - self.server_process = subprocess.Popen( - [ - SERVER_BINARY, self.config["server_map"], "-windowed", - "-ResX=400", "-ResY=300", "-carla-server", - "-carla-world-port={}".format(self.server_port) - ], - preexec_fn=os.setsid, - stdout=open(os.devnull, "w")) - live_carla_processes.add(os.getpgid(self.server_process.pid)) - - for i in range(RETRIES_ON_ERROR): - try: - self.client = CarlaClient("localhost", self.server_port) - return self.client.connect() - except Exception as e: - print("Error connecting: {}, attempt {}".format(e, i)) - time.sleep(2) - - def clear_server_state(self): - print("Clearing Carla server state") - try: - if self.client: - self.client.disconnect() - self.client = None - except Exception as e: - print("Error disconnecting client: {}".format(e)) - pass - if self.server_process: - pgid = os.getpgid(self.server_process.pid) - os.killpg(pgid, signal.SIGKILL) - live_carla_processes.remove(pgid) - self.server_port = None - self.server_process = None - - def __del__(self): - self.clear_server_state() - - def reset(self): - error = None - for _ in range(RETRIES_ON_ERROR): - try: - if not self.server_process: - self.init_server() - return self._reset() - except Exception as e: - print("Error during reset: {}".format(traceback.format_exc())) - self.clear_server_state() - error = e - raise error - - def _reset(self): - self.num_steps = 0 - self.total_reward = 0 - self.prev_measurement = None - self.prev_image = None - self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f") - self.measurements_file = None - - # Create a CarlaSettings object. This object is a wrapper around - # the CarlaSettings.ini file. Here we set the configuration we - # want for the new episode. - settings = CarlaSettings() - self.scenario = random.choice(self.config["scenarios"]) - assert self.scenario["city"] == self.city, (self.scenario, self.city) - self.weather = random.choice(self.scenario["weather_distribution"]) - settings.set( - SynchronousMode=True, - SendNonPlayerAgentsInfo=True, - NumberOfVehicles=self.scenario["num_vehicles"], - NumberOfPedestrians=self.scenario["num_pedestrians"], - WeatherId=self.weather) - settings.randomize_seeds() - - if self.config["use_depth_camera"]: - camera1 = Camera("CameraDepth", PostProcessing="Depth") - camera1.set_image_size(self.config["render_x_res"], - self.config["render_y_res"]) - camera1.set_position(30, 0, 130) - settings.add_sensor(camera1) - - camera2 = Camera("CameraRGB") - camera2.set_image_size(self.config["render_x_res"], - self.config["render_y_res"]) - camera2.set_position(30, 0, 130) - settings.add_sensor(camera2) - - # Setup start and end positions - scene = self.client.load_settings(settings) - positions = scene.player_start_spots - self.start_pos = positions[self.scenario["start_pos_id"]] - self.end_pos = positions[self.scenario["end_pos_id"]] - self.start_coord = [ - self.start_pos.location.x // 100, self.start_pos.location.y // 100 - ] - self.end_coord = [ - self.end_pos.location.x // 100, self.end_pos.location.y // 100 - ] - print("Start pos {} ({}), end {} ({})".format( - self.scenario["start_pos_id"], self.start_coord, - self.scenario["end_pos_id"], self.end_coord)) - - # Notify the server that we want to start the episode at the - # player_start index. This function blocks until the server is ready - # to start the episode. - print("Starting new episode...") - self.client.start_episode(self.scenario["start_pos_id"]) - - image, py_measurements = self._read_observation() - self.prev_measurement = py_measurements - return self.encode_obs(self.preprocess_image(image), py_measurements) - - def encode_obs(self, image, py_measurements): - assert self.config["framestack"] in [1, 2] - prev_image = self.prev_image - self.prev_image = image - if prev_image is None: - prev_image = image - if self.config["framestack"] == 2: - image = np.concatenate([prev_image, image], axis=2) - obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [ - py_measurements["forward_speed"], - py_measurements["distance_to_goal"] - ]) - self.last_obs = obs - return obs - - def step(self, action): - try: - obs = self._step(action) - return obs - except Exception: - print("Error during step, terminating episode early", - traceback.format_exc()) - self.clear_server_state() - return (self.last_obs, 0.0, True, {}) - - def _step(self, action): - if self.config["discrete_actions"]: - action = DISCRETE_ACTIONS[int(action)] - assert len(action) == 2, "Invalid action {}".format(action) - if self.config["squash_action_logits"]: - forward = 2 * float(sigmoid(action[0]) - 0.5) - throttle = float(np.clip(forward, 0, 1)) - brake = float(np.abs(np.clip(forward, -1, 0))) - steer = 2 * float(sigmoid(action[1]) - 0.5) - else: - throttle = float(np.clip(action[0], 0, 1)) - brake = float(np.abs(np.clip(action[0], -1, 0))) - steer = float(np.clip(action[1], -1, 1)) - reverse = False - hand_brake = False - - if self.config["verbose"]: - print("steer", steer, "throttle", throttle, "brake", brake, - "reverse", reverse) - - self.client.send_control( - steer=steer, - throttle=throttle, - brake=brake, - hand_brake=hand_brake, - reverse=reverse) - - # Process observations - image, py_measurements = self._read_observation() - if self.config["verbose"]: - print("Next command", py_measurements["next_command"]) - if type(action) is np.ndarray: - py_measurements["action"] = [float(a) for a in action] - else: - py_measurements["action"] = action - py_measurements["control"] = { - "steer": steer, - "throttle": throttle, - "brake": brake, - "reverse": reverse, - "hand_brake": hand_brake, - } - reward = compute_reward(self, self.prev_measurement, py_measurements) - self.total_reward += reward - py_measurements["reward"] = reward - py_measurements["total_reward"] = self.total_reward - done = (self.num_steps > self.scenario["max_steps"] - or py_measurements["next_command"] == "REACH_GOAL" - or (self.config["early_terminate_on_collision"] - and collided_done(py_measurements))) - py_measurements["done"] = done - self.prev_measurement = py_measurements - - # Write out measurements to file - if CARLA_OUT_PATH: - if not self.measurements_file: - self.measurements_file = open( - os.path.join( - CARLA_OUT_PATH, - "measurements_{}.json".format(self.episode_id)), "w") - self.measurements_file.write(json.dumps(py_measurements)) - self.measurements_file.write("\n") - if done: - self.measurements_file.close() - self.measurements_file = None - if self.config["convert_images_to_video"]: - self.images_to_video() - - self.num_steps += 1 - image = self.preprocess_image(image) - return (self.encode_obs(image, py_measurements), reward, done, - py_measurements) - - def images_to_video(self): - videos_dir = os.path.join(CARLA_OUT_PATH, "Videos") - if not os.path.exists(videos_dir): - os.makedirs(videos_dir) - ffmpeg_cmd = ( - "ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} " - "-start_number 0 -i " - "{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg " - ).format( - x_res=self.config["render_x_res"], - y_res=self.config["render_y_res"], - vid=os.path.join(videos_dir, self.episode_id), - img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id)) - print("Executing ffmpeg command", ffmpeg_cmd) - subprocess.call(ffmpeg_cmd, shell=True) - - def preprocess_image(self, image): - if self.config["use_depth_camera"]: - assert self.config["use_depth_camera"] - data = (image.data - 0.5) * 2 - data = data.reshape(self.config["render_y_res"], - self.config["render_x_res"], 1) - data = cv2.resize( - data, (self.config["x_res"], self.config["y_res"]), - interpolation=cv2.INTER_AREA) - data = np.expand_dims(data, 2) - else: - data = image.data.reshape(self.config["render_y_res"], - self.config["render_x_res"], 3) - data = cv2.resize( - data, (self.config["x_res"], self.config["y_res"]), - interpolation=cv2.INTER_AREA) - data = (data.astype(np.float32) - 128) / 128 - return data - - def _read_observation(self): - # Read the data produced by the server this frame. - measurements, sensor_data = self.client.read_data() - - # Print some of the measurements. - if self.config["verbose"]: - print_measurements(measurements) - - observation = None - if self.config["use_depth_camera"]: - camera_name = "CameraDepth" - else: - camera_name = "CameraRGB" - for name, image in sensor_data.items(): - if name == camera_name: - observation = image - - cur = measurements.player_measurements - - if self.config["enable_planner"]: - next_command = COMMANDS_ENUM[self.planner.get_next_command( - [cur.transform.location.x, cur.transform.location.y, GROUND_Z], - [ - cur.transform.orientation.x, cur.transform.orientation.y, - GROUND_Z - ], - [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [ - self.end_pos.orientation.x, self.end_pos.orientation.y, - GROUND_Z - ])] - else: - next_command = "LANE_FOLLOW" - - if next_command == "REACH_GOAL": - distance_to_goal = 0.0 # avoids crash in planner - elif self.config["enable_planner"]: - distance_to_goal = self.planner.get_shortest_path_distance([ - cur.transform.location.x, cur.transform.location.y, GROUND_Z - ], [ - cur.transform.orientation.x, cur.transform.orientation.y, - GROUND_Z - ], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [ - self.end_pos.orientation.x, self.end_pos.orientation.y, - GROUND_Z - ]) / 100 - else: - distance_to_goal = -1 - - distance_to_goal_euclidean = float( - np.linalg.norm([ - cur.transform.location.x - self.end_pos.location.x, - cur.transform.location.y - self.end_pos.location.y - ]) / 100) - - py_measurements = { - "episode_id": self.episode_id, - "step": self.num_steps, - "x": cur.transform.location.x, - "y": cur.transform.location.y, - "x_orient": cur.transform.orientation.x, - "y_orient": cur.transform.orientation.y, - "forward_speed": cur.forward_speed, - "distance_to_goal": distance_to_goal, - "distance_to_goal_euclidean": distance_to_goal_euclidean, - "collision_vehicles": cur.collision_vehicles, - "collision_pedestrians": cur.collision_pedestrians, - "collision_other": cur.collision_other, - "intersection_offroad": cur.intersection_offroad, - "intersection_otherlane": cur.intersection_otherlane, - "weather": self.weather, - "map": self.config["server_map"], - "start_coord": self.start_coord, - "end_coord": self.end_coord, - "current_scenario": self.scenario, - "x_res": self.config["x_res"], - "y_res": self.config["y_res"], - "num_vehicles": self.scenario["num_vehicles"], - "num_pedestrians": self.scenario["num_pedestrians"], - "max_steps": self.scenario["max_steps"], - "next_command": next_command, - } - - if CARLA_OUT_PATH and self.config["log_images"]: - for name, image in sensor_data.items(): - out_dir = os.path.join(CARLA_OUT_PATH, name) - if not os.path.exists(out_dir): - os.makedirs(out_dir) - out_file = os.path.join( - out_dir, "{}_{:>04}.jpg".format(self.episode_id, - self.num_steps)) - scipy.misc.imsave(out_file, image.data) - - assert observation is not None, sensor_data - return observation, py_measurements - - -def compute_reward_corl2017(env, prev, current): - reward = 0.0 - - cur_dist = current["distance_to_goal"] - - prev_dist = prev["distance_to_goal"] - - if env.config["verbose"]: - print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist)) - - # Distance travelled toward the goal in m - reward += np.clip(prev_dist - cur_dist, -10.0, 10.0) - - # Change in speed (km/h) - reward += 0.05 * (current["forward_speed"] - prev["forward_speed"]) - - # New collision damage - reward -= .00002 * ( - current["collision_vehicles"] + current["collision_pedestrians"] + - current["collision_other"] - prev["collision_vehicles"] - - prev["collision_pedestrians"] - prev["collision_other"]) - - # New sidewalk intersection - reward -= 2 * ( - current["intersection_offroad"] - prev["intersection_offroad"]) - - # New opposite lane intersection - reward -= 2 * ( - current["intersection_otherlane"] - prev["intersection_otherlane"]) - - return reward - - -def compute_reward_custom(env, prev, current): - reward = 0.0 - - cur_dist = current["distance_to_goal"] - prev_dist = prev["distance_to_goal"] - - if env.config["verbose"]: - print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist)) - - # Distance travelled toward the goal in m - reward += np.clip(prev_dist - cur_dist, -10.0, 10.0) - - # Speed reward, up 30.0 (km/h) - reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10 - - # New collision damage - new_damage = ( - current["collision_vehicles"] + current["collision_pedestrians"] + - current["collision_other"] - prev["collision_vehicles"] - - prev["collision_pedestrians"] - prev["collision_other"]) - if new_damage: - reward -= 100.0 - - # Sidewalk intersection - reward -= current["intersection_offroad"] - - # Opposite lane intersection - reward -= current["intersection_otherlane"] - - # Reached goal - if current["next_command"] == "REACH_GOAL": - reward += 100.0 - - return reward - - -def compute_reward_lane_keep(env, prev, current): - reward = 0.0 - - # Speed reward, up 30.0 (km/h) - reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10 - - # New collision damage - new_damage = ( - current["collision_vehicles"] + current["collision_pedestrians"] + - current["collision_other"] - prev["collision_vehicles"] - - prev["collision_pedestrians"] - prev["collision_other"]) - if new_damage: - reward -= 100.0 - - # Sidewalk intersection - reward -= current["intersection_offroad"] - - # Opposite lane intersection - reward -= current["intersection_otherlane"] - - return reward - - -REWARD_FUNCTIONS = { - "corl2017": compute_reward_corl2017, - "custom": compute_reward_custom, - "lane_keep": compute_reward_lane_keep, -} - - -def compute_reward(env, prev, current): - return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current) - - -def print_measurements(measurements): - number_of_agents = len(measurements.non_player_agents) - player_measurements = measurements.player_measurements - message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), " - message += "{speed:.2f} km/h, " - message += "Collision: {{vehicles={col_cars:.0f}, " - message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, " - message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, " - message += "({agents_num:d} non-player agents in the scene)" - message = message.format( - pos_x=player_measurements.transform.location.x / 100, # cm -> m - pos_y=player_measurements.transform.location.y / 100, - speed=player_measurements.forward_speed, - col_cars=player_measurements.collision_vehicles, - col_ped=player_measurements.collision_pedestrians, - col_other=player_measurements.collision_other, - other_lane=100 * player_measurements.intersection_otherlane, - offroad=100 * player_measurements.intersection_offroad, - agents_num=number_of_agents) - print(message) - - -def sigmoid(x): - x = float(x) - return np.exp(x) / (1 + np.exp(x)) - - -def collided_done(py_measurements): - m = py_measurements - collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0 - or m["collision_other"] > 0) - return bool(collided or m["total_reward"] < -100) - - -if __name__ == "__main__": - for _ in range(2): - env = CarlaEnv() - obs = env.reset() - print("reset", obs) - start = time.time() - done = False - i = 0 - total_reward = 0.0 - while not done: - i += 1 - if ENV_CONFIG["discrete_actions"]: - obs, reward, done, info = env.step(1) - else: - obs, reward, done, info = env.step([0, 1, 0]) - total_reward += reward - print(i, "rew", reward, "total", total_reward, "done", done) - print("{} fps".format(100 / (time.time() - start))) diff --git a/python/ray/rllib/examples/carla/models.py b/python/ray/rllib/examples/carla/models.py deleted file mode 100644 index 3f8cc0c5ba47..000000000000 --- a/python/ray/rllib/examples/carla/models.py +++ /dev/null @@ -1,108 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -import tensorflow.contrib.slim as slim -from tensorflow.contrib.layers import xavier_initializer - -from ray.rllib.models.catalog import ModelCatalog -from ray.rllib.models.misc import normc_initializer -from ray.rllib.models.model import Model - - -class CarlaModel(Model): - """Carla model that can process the observation tuple. - - The architecture processes the image using convolutional layers, the - metrics using fully connected layers, and then combines them with - further fully connected layers. - """ - - # TODO(ekl): use build_layers_v2 for native dict space support - def _build_layers(self, inputs, num_outputs, options): - # Parse options - image_shape = options["custom_options"]["image_shape"] - convs = options.get("conv_filters", [ - [16, [8, 8], 4], - [32, [5, 5], 3], - [32, [5, 5], 2], - [512, [10, 10], 1], - ]) - hiddens = options.get("fcnet_hiddens", [64]) - fcnet_activation = options.get("fcnet_activation", "tanh") - if fcnet_activation == "tanh": - activation = tf.nn.tanh - elif fcnet_activation == "relu": - activation = tf.nn.relu - - # Sanity checks - image_size = np.product(image_shape) - expected_shape = [image_size + 5 + 2] - assert inputs.shape.as_list()[1:] == expected_shape, \ - (inputs.shape.as_list()[1:], expected_shape) - - # Reshape the input vector back into its components - vision_in = tf.reshape(inputs[:, :image_size], - [tf.shape(inputs)[0]] + image_shape) - metrics_in = inputs[:, image_size:] - print("Vision in shape", vision_in) - print("Metrics in shape", metrics_in) - - # Setup vision layers - with tf.name_scope("carla_vision"): - for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): - vision_in = slim.conv2d( - vision_in, - out_size, - kernel, - stride, - scope="conv{}".format(i)) - out_size, kernel, stride = convs[-1] - vision_in = slim.conv2d( - vision_in, - out_size, - kernel, - stride, - padding="VALID", - scope="conv_out") - vision_in = tf.squeeze(vision_in, [1, 2]) - - # Setup metrics layer - with tf.name_scope("carla_metrics"): - metrics_in = slim.fully_connected( - metrics_in, - 64, - weights_initializer=xavier_initializer(), - activation_fn=activation, - scope="metrics_out") - - print("Shape of vision out is", vision_in.shape) - print("Shape of metric out is", metrics_in.shape) - - # Combine the metrics and vision inputs - with tf.name_scope("carla_out"): - i = 1 - last_layer = tf.concat([vision_in, metrics_in], axis=1) - print("Shape of concatenated out is", last_layer.shape) - for size in hiddens: - last_layer = slim.fully_connected( - last_layer, - size, - weights_initializer=xavier_initializer(), - activation_fn=activation, - scope="fc{}".format(i)) - i += 1 - output = slim.fully_connected( - last_layer, - num_outputs, - weights_initializer=normc_initializer(0.01), - activation_fn=None, - scope="fc_out") - - return output, last_layer - - -def register_carla_model(): - ModelCatalog.register_custom_model("carla", CarlaModel) diff --git a/python/ray/rllib/examples/carla/scenarios.py b/python/ray/rllib/examples/carla/scenarios.py deleted file mode 100644 index beedd2989d5c..000000000000 --- a/python/ray/rllib/examples/carla/scenarios.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Collection of Carla scenarios, including those from the CoRL 2017 paper.""" - -TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13] -TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14] - - -def build_scenario(city, start, end, vehicles, pedestrians, max_steps, - weathers): - return { - "city": city, - "num_vehicles": vehicles, - "num_pedestrians": pedestrians, - "weather_distribution": weathers, - "start_pos_id": start, - "end_pos_id": end, - "max_steps": max_steps, - } - - -# Simple scenario for Town02 that involves driving down a road -DEFAULT_SCENARIO = build_scenario( - city="Town02", - start=36, - end=40, - vehicles=20, - pedestrians=40, - max_steps=200, - weathers=[0]) - -# Simple scenario for Town02 that involves driving down a road -LANE_KEEP = build_scenario( - city="Town02", - start=36, - end=40, - vehicles=0, - pedestrians=0, - max_steps=2000, - weathers=[0]) - -# Scenarios from the CoRL2017 paper -POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [ - 68, 50 -], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [ - 55, 44 -], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107], - [78, 70], [95, 102], [68, 44], [45, 69]] - -POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [ - 85, 98 -], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [ - 121, 140 -], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69], - [47, 79], [110, 15], [130, 17], [0, 17]] - -POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [ - 96, 26 -], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [ - 147, 97 -], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64], - [79, 45], [84, 69], [73, 31], [37, 81]] - -POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [ - 64, 66 -], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [ - 54, 60 -], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63], - [51, 42], [16, 19], [17, 26], [77, 68]] - -POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [ - 58, 71 -], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [ - 80, 21 -], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63], - [58, 76], [79, 55], [16, 61], [27, 11]] - -POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [ - 31, 71 -], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14], - [5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12], - [51, 81], [77, 68], [56, 65], [43, 54]] - -TOWN1_STRAIGHT = [ - build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_STRAIGHT -] - -TOWN1_ONE_CURVE = [ - build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_ONE_CURVE -] - -TOWN1_NAVIGATION = [ - build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_NAV -] - -TOWN1_NAVIGATION_DYNAMIC = [ - build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_NAV -] - -TOWN2_STRAIGHT = [ - build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_STRAIGHT -] - -TOWN2_STRAIGHT_DYNAMIC = [ - build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_STRAIGHT -] - -TOWN2_ONE_CURVE = [ - build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_ONE_CURVE -] - -TOWN2_NAVIGATION = [ - build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_NAV -] - -TOWN2_NAVIGATION_DYNAMIC = [ - build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_NAV -] - -TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION + - TOWN1_NAVIGATION_DYNAMIC) - -TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION + - TOWN2_NAVIGATION_DYNAMIC) diff --git a/python/ray/rllib/examples/carla/train_a3c.py b/python/ray/rllib/examples/carla/train_a3c.py deleted file mode 100644 index 8fbcfbc576d1..000000000000 --- a/python/ray/rllib/examples/carla/train_a3c.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import ray -from ray.tune import grid_search, run_experiments - -from env import CarlaEnv, ENV_CONFIG -from models import register_carla_model -from scenarios import TOWN2_STRAIGHT - -env_config = ENV_CONFIG.copy() -env_config.update({ - "verbose": False, - "x_res": 80, - "y_res": 80, - "squash_action_logits": grid_search([False, True]), - "use_depth_camera": False, - "discrete_actions": False, - "server_map": "/Game/Maps/Town02", - "reward_function": grid_search(["custom", "corl2017"]), - "scenarios": TOWN2_STRAIGHT, -}) - -register_carla_model() -redis_address = ray.services.get_node_ip_address() + ":6379" - -ray.init(redis_address=redis_address) -run_experiments({ - "carla-a3c": { - "run": "A3C", - "env": CarlaEnv, - "config": { - "env_config": env_config, - "use_gpu_for_workers": True, - "model": { - "custom_model": "carla", - "custom_options": { - "image_shape": [80, 80, 6], - }, - "conv_filters": [ - [16, [8, 8], 4], - [32, [4, 4], 2], - [512, [10, 10], 1], - ], - }, - "gamma": 0.95, - "num_workers": 2, - }, - }, -}) diff --git a/python/ray/rllib/examples/carla/train_dqn.py b/python/ray/rllib/examples/carla/train_dqn.py deleted file mode 100644 index 27aa65444d38..000000000000 --- a/python/ray/rllib/examples/carla/train_dqn.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import ray -from ray.tune import run_experiments - -from env import CarlaEnv, ENV_CONFIG -from models import register_carla_model -from scenarios import TOWN2_ONE_CURVE - -env_config = ENV_CONFIG.copy() -env_config.update({ - "verbose": False, - "x_res": 80, - "y_res": 80, - "discrete_actions": True, - "server_map": "/Game/Maps/Town02", - "reward_function": "custom", - "scenarios": TOWN2_ONE_CURVE, -}) - -register_carla_model() - -ray.init() - - -def shape_out(spec): - return (spec.config.env_config.framestack * - (spec.config.env_config.use_depth_camera and 1 or 3)) - - -run_experiments({ - "carla-dqn": { - "run": "DQN", - "env": CarlaEnv, - "config": { - "env_config": env_config, - "model": { - "custom_model": "carla", - "custom_options": { - "image_shape": [ - 80, - 80, - shape_out, - ], - }, - "conv_filters": [ - [16, [8, 8], 4], - [32, [4, 4], 2], - [512, [10, 10], 1], - ], - }, - "timesteps_per_iteration": 100, - "learning_starts": 1000, - "schedule_max_timesteps": 100000, - "gamma": 0.8, - "tf_session_args": { - "gpu_options": { - "allow_growth": True - }, - }, - }, - }, -}) diff --git a/python/ray/rllib/examples/carla/train_ppo.py b/python/ray/rllib/examples/carla/train_ppo.py deleted file mode 100644 index 130acf3a5849..000000000000 --- a/python/ray/rllib/examples/carla/train_ppo.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import ray -from ray.tune import run_experiments - -from env import CarlaEnv, ENV_CONFIG -from models import register_carla_model -from scenarios import TOWN2_STRAIGHT - -env_config = ENV_CONFIG.copy() -env_config.update({ - "verbose": False, - "x_res": 80, - "y_res": 80, - "use_depth_camera": False, - "discrete_actions": False, - "server_map": "/Game/Maps/Town02", - "scenarios": TOWN2_STRAIGHT, -}) -register_carla_model() - -ray.init() -run_experiments({ - "carla": { - "run": "PPO", - "env": CarlaEnv, - "config": { - "env_config": env_config, - "model": { - "custom_model": "carla", - "custom_options": { - "image_shape": [ - env_config["x_res"], env_config["y_res"], 6 - ], - }, - "conv_filters": [ - [16, [8, 8], 4], - [32, [4, 4], 2], - [512, [10, 10], 1], - ], - }, - "num_workers": 1, - "train_batch_size": 2000, - "sample_batch_size": 100, - "lambda": 0.95, - "clip_param": 0.2, - "num_sgd_iter": 20, - "lr": 0.0001, - "sgd_minibatch_size": 32, - "num_gpus": 1, - }, - }, -}) diff --git a/python/ray/rllib/examples/custom_fast_model.py b/python/ray/rllib/examples/custom_fast_model.py index 86201c87da7a..dce01e9e7754 100644 --- a/python/ray/rllib/examples/custom_fast_model.py +++ b/python/ray/rllib/examples/custom_fast_model.py @@ -11,11 +11,13 @@ from gym.spaces import Discrete, Box import gym import numpy as np -import tensorflow as tf import ray from ray.rllib.models import Model, ModelCatalog from ray.tune import run_experiments, sample_from +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() class FastModel(Model): diff --git a/python/ray/rllib/examples/custom_loss.py b/python/ray/rllib/examples/custom_loss.py index 1f04f0fb5a6e..8905b48952da 100644 --- a/python/ray/rllib/examples/custom_loss.py +++ b/python/ray/rllib/examples/custom_loss.py @@ -15,7 +15,6 @@ import argparse import os -import tensorflow as tf import ray from ray import tune @@ -23,6 +22,9 @@ ModelCatalog) from ray.rllib.models.model import restore_original_dimensions from ray.rllib.offline import JsonReader +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--iters", type=int, default=200) diff --git a/python/ray/rllib/examples/export/cartpole_dqn_export.py b/python/ray/rllib/examples/export/cartpole_dqn_export.py index 6bfcae060d13..47a5e3b41ea7 100644 --- a/python/ray/rllib/examples/export/cartpole_dqn_export.py +++ b/python/ray/rllib/examples/export/cartpole_dqn_export.py @@ -6,9 +6,11 @@ import os import ray -import tensorflow as tf from ray.rllib.agents.registry import get_agent_class +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() ray.init(num_cpus=10) diff --git a/python/ray/rllib/examples/multiagent_cartpole.py b/python/ray/rllib/examples/multiagent_cartpole.py index d7485e27a0c6..6e0f93711540 100644 --- a/python/ray/rllib/examples/multiagent_cartpole.py +++ b/python/ray/rllib/examples/multiagent_cartpole.py @@ -16,14 +16,14 @@ import gym import random -import tensorflow as tf -import tensorflow.contrib.slim as slim - import ray from ray import tune from ray.rllib.models import Model, ModelCatalog from ray.rllib.tests.test_multi_agent_env import MultiCartpole from ray.tune.registry import register_env +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() parser = argparse.ArgumentParser() @@ -43,12 +43,12 @@ def _build_layers_v2(self, input_dict, num_outputs, options): tf.VariableScope(tf.AUTO_REUSE, "shared"), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): - last_layer = slim.fully_connected( - input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1") - last_layer = slim.fully_connected( - last_layer, 64, activation_fn=tf.nn.relu, scope="fc2") - output = slim.fully_connected( - last_layer, num_outputs, activation_fn=None, scope="fc_out") + last_layer = tf.layers.dense( + input_dict["obs"], 64, activation=tf.nn.relu, name="fc1") + last_layer = tf.layers.dense( + last_layer, 64, activation=tf.nn.relu, name="fc2") + output = tf.layers.dense( + last_layer, num_outputs, activation=None, name="fc_out") return output, last_layer @@ -59,12 +59,12 @@ def _build_layers_v2(self, input_dict, num_outputs, options): tf.VariableScope(tf.AUTO_REUSE, "shared"), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): - last_layer = slim.fully_connected( - input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1") - last_layer = slim.fully_connected( - last_layer, 64, activation_fn=tf.nn.relu, scope="fc2") - output = slim.fully_connected( - last_layer, num_outputs, activation_fn=None, scope="fc_out") + last_layer = tf.layers.dense( + input_dict["obs"], 64, activation=tf.nn.relu, name="fc1") + last_layer = tf.layers.dense( + last_layer, 64, activation=tf.nn.relu, name="fc2") + output = tf.layers.dense( + last_layer, num_outputs, activation=None, name="fc_out") return output, last_layer diff --git a/python/ray/rllib/examples/parametric_action_cartpole.py b/python/ray/rllib/examples/parametric_action_cartpole.py index 3d57c268cae3..e16e1ab75870 100644 --- a/python/ray/rllib/examples/parametric_action_cartpole.py +++ b/python/ray/rllib/examples/parametric_action_cartpole.py @@ -23,14 +23,15 @@ import numpy as np import gym from gym.spaces import Box, Discrete, Dict -import tensorflow as tf -import tensorflow.contrib.slim as slim import ray from ray import tune from ray.rllib.models import Model, ModelCatalog from ray.rllib.models.misc import normc_initializer from ray.tune.registry import register_env +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() parser = argparse.ArgumentParser() parser.add_argument("--stop", type=int, default=200) @@ -134,18 +135,18 @@ def _build_layers_v2(self, input_dict, num_outputs, options): hiddens = [256, 256] for i, size in enumerate(hiddens): label = "fc{}".format(i) - last_layer = slim.fully_connected( + last_layer = tf.layers.dense( last_layer, size, - weights_initializer=normc_initializer(1.0), - activation_fn=tf.nn.tanh, - scope=label) - output = slim.fully_connected( + kernel_initializer=normc_initializer(1.0), + activation=tf.nn.tanh, + name=label) + output = tf.layers.dense( last_layer, action_embed_size, - weights_initializer=normc_initializer(0.01), - activation_fn=None, - scope="fc_out") + kernel_initializer=normc_initializer(0.01), + activation=None, + name="fc_out") # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE]. diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py index 1cad7d3aa9ac..9cf58b9dd317 100644 --- a/python/ray/rllib/models/action_dist.py +++ b/python/ray/rllib/models/action_dist.py @@ -12,7 +12,11 @@ tf = try_import_tf() if tf: - use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >= + if hasattr(tf, "__version__"): + version = tf.__version__ + else: + version = tf.VERSION + use_tf150_api = (distutils.version.LooseVersion(version) >= distutils.version.LooseVersion("1.5.0")) else: use_tf150_api = False diff --git a/python/ray/rllib/models/fcnet.py b/python/ray/rllib/models/fcnet.py index 3cc0fbe403c5..c3bacbd46a7d 100644 --- a/python/ray/rllib/models/fcnet.py +++ b/python/ray/rllib/models/fcnet.py @@ -21,8 +21,6 @@ def _build_layers(self, inputs, num_outputs, options): model that processes the components separately, use _build_layers_v2(). """ - import tensorflow.contrib.slim as slim - hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) @@ -31,18 +29,18 @@ def _build_layers(self, inputs, num_outputs, options): last_layer = inputs for size in hiddens: label = "fc{}".format(i) - last_layer = slim.fully_connected( + last_layer = tf.layers.dense( last_layer, size, - weights_initializer=normc_initializer(1.0), - activation_fn=activation, - scope=label) + kernel_initializer=normc_initializer(1.0), + activation=activation, + name=label) i += 1 label = "fc_out" - output = slim.fully_connected( + output = tf.layers.dense( last_layer, num_outputs, - weights_initializer=normc_initializer(0.01), - activation_fn=None, - scope=label) + kernel_initializer=normc_initializer(0.01), + activation=None, + name=label) return output, last_layer diff --git a/python/ray/rllib/models/lstm.py b/python/ray/rllib/models/lstm.py index 5b9328c3c463..62b854a86ed9 100644 --- a/python/ray/rllib/models/lstm.py +++ b/python/ray/rllib/models/lstm.py @@ -38,8 +38,6 @@ class LSTM(Model): @override(Model) def _build_layers_v2(self, input_dict, num_outputs, options): - import tensorflow.contrib.rnn as rnn - cell_size = options.get("lstm_cell_size") if options.get("lstm_use_prev_action_reward"): action_dim = int( @@ -76,7 +74,7 @@ def _build_layers_v2(self, input_dict, num_outputs, options): self.state_in = [c_in, h_in] # Setup LSTM outputs - state_in = rnn.LSTMStateTuple(c_in, h_in) + state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn( lstm, last_layer, diff --git a/python/ray/rllib/models/visionnet.py b/python/ray/rllib/models/visionnet.py index 53eaf5d02c3f..6ad30ddb90c4 100644 --- a/python/ray/rllib/models/visionnet.py +++ b/python/ray/rllib/models/visionnet.py @@ -15,8 +15,6 @@ class VisionNetwork(Model): @override(Model) def _build_layers_v2(self, input_dict, num_outputs, options): - import tensorflow.contrib.slim as slim - inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: @@ -26,28 +24,29 @@ def _build_layers_v2(self, input_dict, num_outputs, options): with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): - inputs = slim.conv2d( + inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, - activation_fn=activation, - scope="conv{}".format(i)) + activation=activation, + padding="same", + name="conv{}".format(i)) out_size, kernel, stride = filters[-1] - fc1 = slim.conv2d( + fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, - activation_fn=activation, - padding="VALID", - scope="fc1") - fc2 = slim.conv2d( + activation=activation, + padding="valid", + name="fc1") + fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], - activation_fn=None, - normalizer_fn=None, - scope="fc2") + activation=None, + padding="same", + name="fc2") return flatten(fc2), flatten(fc1) diff --git a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py index a584be7e6c53..328fee67d548 100644 --- a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py +++ b/python/ray/rllib/optimizers/aso_multi_gpu_learner.py @@ -17,6 +17,9 @@ from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer from ray.rllib.utils.annotations import override from ray.rllib.utils.timer import TimerStat +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() logger = logging.getLogger(__name__) @@ -38,9 +41,6 @@ def __init__(self, learner_queue_size=16, num_data_load_threads=16, _fake_gpus=False): - # Multi-GPU requires TensorFlow to function. - import tensorflow as tf - LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter, learner_queue_size) self.lr = lr diff --git a/python/ray/rllib/tests/test_catalog.py b/python/ray/rllib/tests/test_catalog.py index fe89152c6cbd..1c93b40ed484 100644 --- a/python/ray/rllib/tests/test_catalog.py +++ b/python/ray/rllib/tests/test_catalog.py @@ -1,6 +1,5 @@ import gym import numpy as np -import tensorflow as tf import unittest from gym.spaces import Box, Discrete, Tuple @@ -12,6 +11,9 @@ Preprocessor) from ray.rllib.models.fcnet import FullyConnectedNetwork from ray.rllib.models.visionnet import VisionNetwork +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() class CustomPreprocessor(Preprocessor): diff --git a/python/ray/rllib/tests/test_lstm.py b/python/ray/rllib/tests/test_lstm.py index 385f2d7bc1ba..dd9c7ccd9d86 100644 --- a/python/ray/rllib/tests/test_lstm.py +++ b/python/ray/rllib/tests/test_lstm.py @@ -6,8 +6,6 @@ import numpy as np import pickle import unittest -import tensorflow as tf -import tensorflow.contrib.rnn as rnn import ray from ray.rllib.agents.ppo import PPOTrainer @@ -16,6 +14,9 @@ from ray.rllib.models.misc import linear, normc_initializer from ray.rllib.models.model import Model from ray.tune.registry import register_env +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() class LSTMUtilsTest(unittest.TestCase): @@ -104,7 +105,7 @@ def spy(sequences, state_in, state_out, seq_lens): last_layer = add_time_dimension(features, self.seq_lens) # Setup the LSTM cell - lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True) + lstm = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True) self.state_init = [ np.zeros(lstm.state_size.c, np.float32), np.zeros(lstm.state_size.h, np.float32) @@ -121,7 +122,7 @@ def spy(sequences, state_in, state_out, seq_lens): self.state_in = [c_in, h_in] # Setup LSTM outputs - state_in = rnn.LSTMStateTuple(c_in, h_in) + state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn( lstm, last_layer, diff --git a/python/ray/rllib/tests/test_nested_spaces.py b/python/ray/rllib/tests/test_nested_spaces.py index dc45ca3f605e..e4285e42287c 100644 --- a/python/ray/rllib/tests/test_nested_spaces.py +++ b/python/ray/rllib/tests/test_nested_spaces.py @@ -7,8 +7,6 @@ from gym import spaces from gym.envs.registration import EnvSpec import gym -import tensorflow.contrib.slim as slim -import tensorflow as tf import unittest import ray @@ -25,6 +23,9 @@ from ray.rllib.rollout import rollout from ray.rllib.tests.test_external_env import SimpleServing from ray.tune.registry import register_env +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() DICT_SPACE = spaces.Dict({ "sensors": spaces.Dict({ @@ -179,8 +180,8 @@ def spy(pos, front_cam, task): stateful=True) with tf.control_dependencies([spy_fn]): - output = slim.fully_connected( - input_dict["obs"]["sensors"]["position"], num_outputs) + output = tf.layers.dense(input_dict["obs"]["sensors"]["position"], + num_outputs) return output, output @@ -208,7 +209,7 @@ def spy(pos, cam, task): stateful=True) with tf.control_dependencies([spy_fn]): - output = slim.fully_connected(input_dict["obs"][0], num_outputs) + output = tf.layers.dense(input_dict["obs"][0], num_outputs) return output, output diff --git a/python/ray/rllib/tests/test_optimizers.py b/python/ray/rllib/tests/test_optimizers.py index 65992a220ba2..9c9e6b56b426 100644 --- a/python/ray/rllib/tests/test_optimizers.py +++ b/python/ray/rllib/tests/test_optimizers.py @@ -4,7 +4,6 @@ import gym import numpy as np -import tensorflow as tf import time import unittest @@ -16,6 +15,9 @@ from ray.rllib.optimizers import AsyncGradientsOptimizer, AsyncSamplesOptimizer from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator from ray.rllib.tests.mock_evaluator import _MockEvaluator +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() class AsyncOptimizerTest(unittest.TestCase): diff --git a/python/ray/rllib/utils/__init__.py b/python/ray/rllib/utils/__init__.py index 9ff0295690e2..a16cba22b611 100644 --- a/python/ray/rllib/utils/__init__.py +++ b/python/ray/rllib/utils/__init__.py @@ -33,10 +33,15 @@ def try_import_tf(): return None try: - import tensorflow as tf + import tensorflow.compat.v1 as tf + tf.disable_v2_behavior() return tf except ImportError: - return None + try: + import tensorflow as tf + return tf + except ImportError: + return None __all__ = [