Skip to content

Commit

Permalink
Update evaluate and train to evaluate per episode (#639)
Browse files Browse the repository at this point in the history
* Update evaluate and train to eval per episode

* Fix tests and change normalization

* Fix formatting

* Record episode for train/eval at the same time

* Train performance averages last eval_rate episodes

* Add gap_mode and calculate generalization gap

* Enable explore during eval for better comparison

* Fix formatting

* Fix averaging in record_episode

* Only average scalars in record_episode

* Check for scalar when calculating gap

* Fix formatting

* Fix bugs introduced by merge

* Fix calculate_gap

* Fix averaging over episodes

* Fix formatting

* Check eval_episodes is 0 before evaluation

* Evaluate on train and test without exploration

* Make train/test occur in one ray.get

* Remove episode_count

* Remove episode_count from tests

* Remove averaging over training episodes

* Restructure evaluation_check with eval_mode = False

* Fix formatting

* try split ci test method

* Check for valid eval_episodes inside evaluation_check

* Removed averaging  feature of record_episode

* Fix formatting

Co-authored-by: Jenish Patel <[email protected]>
  • Loading branch information
AlexLewandowski and JenishPatel99 authored Apr 6, 2021
1 parent 96d806f commit a55c732
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 41 deletions.
60 changes: 54 additions & 6 deletions .github/workflows/ci-ultra-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: ULTRA CI Base Tests
on: [push, pull_request]

jobs:
test-base:
test-heavy-base-tests:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
container: huaweinoah/smarts:v0.4.13-minimal
Expand All @@ -28,12 +28,58 @@ jobs:
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade -e .
pip install --upgrade numpy
- name: Run ultra tests
- name: Run ultra tests (heavy)
run: |
cd ultra
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/
pytest -v \
./tests/ \
--ignore=./tests/test_ultra_package.py \
--ignore=./tests/test_adapter.py \
--ignore=./tests/test_env.py \
--ignore=./tests/test_episodes.py \
--ignore=./tests/test_scenarios.py \
--ignore=./tests/test_social_vehicles.py \
--ignore=./tests/test_rllib_train.py \
test-light-base-tests:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
container: huaweinoah/smarts:v0.4.13-minimal
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Setup X11
run: |
/usr/bin/Xorg \
-noreset \
+extension GLX \
+extension RANDR \
+extension RENDER \
-logfile ./xdummy.log \
-config /etc/X11/xorg.conf :1 &
- name: Install dependencies
run: |
cd ultra
python3.7 -m venv .venv
. .venv/bin/activate
pip install --upgrade --upgrade-strategy eager pip
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade -e .
pip install --upgrade numpy
- name: Run ultra tests (light)
run: |
cd ultra
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v \
./tests/ \
--ignore=./tests/test_ultra_package.py \
--ignore=./tests/test_train.py \
--ignore=./tests/test_evaluate.py \
--ignore=./tests/test_analysis.py \
test-package-via-setup:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down Expand Up @@ -64,6 +110,7 @@ jobs:
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/test_ultra_package.py
test-package-via-wheel:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand All @@ -85,9 +132,9 @@ jobs:
cd ultra
python3.7 -m venv .venv
. .venv/bin/activate
pip install --upgrade --upgrade-strategy eager pip
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade --upgrade-strategy eager -e .
pip install --upgrade pip
pip install --upgrade -e .
pip install --upgrade numpy
python setup.py bdist_wheel
cd dist
pip install $(ls . | grep ultra)
Expand All @@ -99,6 +146,7 @@ jobs:
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/test_ultra_package.py
# test-package-via-pypi:
# runs-on: ubuntu-18.04
# if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down
4 changes: 1 addition & 3 deletions ultra/tests/test_episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def test_episode_counter(self):
@ray.remote(max_calls=1, num_gpus=0, num_cpus=1)
def run_experiment():
agent, env = prepare_test_env_agent()
episode_count = 0
log_dir = os.path.join(EpisodeTest.OUTPUT_DIRECTORY, "logs/")
for episode in episodes(2, etag="Train", log_dir=log_dir):
observations = env.reset()
Expand Down Expand Up @@ -150,9 +149,8 @@ def run_experiment():
)
state = next_state
total_step += 1
episode_count += 1
env.close()
return episode_count
return episode.index

ray.init(ignore_reinit_error=True)
episode_count = ray.get(run_experiment.remote())
Expand Down
53 changes: 49 additions & 4 deletions ultra/ultra/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def evaluation_check(
agent_ids_to_evaluate = [
agent_id
for agent_id in agent_ids
if (episode.get_itr(agent_id) + 1) % eval_rate == 0
and episode.last_eval_iterations[agent_id] != episode.get_itr(agent_id)
if episode.index % eval_rate == 0
and episode.last_eval_iterations[agent_id] != episode.index
]

# Skip evaluation if there are no agents needing an evaluation.
Expand Down Expand Up @@ -95,16 +95,60 @@ def evaluation_check(
headless=headless,
timestep_sec=timestep_sec,
log_dir=log_dir,
eval_mode=True,
)
]
)[0]
)
episode.eval_count += 1
episode.last_eval_iterations[agent_id] = episode.get_itr(agent_id)

# Put the evaluation data for all agents into the episode and record the TensorBoard.
episode.eval_mode()
episode.info[episode.active_tag] = evaluation_data
episode.record_tensorboard()

episode.eval_train_mode()
evaluation_train_data = {}

for agent_id in agent_ids_to_evaluate:
# Get the checkpoint directory for the current agent and save its model.
checkpoint_directory = episode.checkpoint_dir(
agent_id, episode.get_itr(agent_id)
)
agents[agent_id].save(checkpoint_directory)

# Perform the evaluation on this agent and save the data.
evaluation_train_data.update(
ray.get(
[
evaluate.remote(
seed=episode.eval_count,
experiment_dir=episode.experiment_dir,
agent_ids=[agent_id],
policy_classes={agent_id: policy_classes[agent_id]},
checkpoint_dirs={agent_id: checkpoint_directory},
scenario_info=scenario_info,
num_episodes=eval_episodes,
max_episode_steps=max_episode_steps,
headless=headless,
timestep_sec=timestep_sec,
log_dir=log_dir,
eval_mode=False,
)
]
)[0]
)
episode.eval_count += 1
#
# Put the evaluation data for all agents into the episode and record the TensorBoard.

episode.info[episode.active_tag] = evaluation_train_data
episode.record_tensorboard()

episode.gap_mode()
episode.calculate_gap()
episode.record_tensorboard()

episode.train_mode()


Expand All @@ -122,6 +166,7 @@ def evaluate(
headless,
timestep_sec,
log_dir,
eval_mode=True,
):
torch.set_num_threads(1)

Expand All @@ -145,7 +190,7 @@ def evaluate(
headless=headless,
timestep_sec=timestep_sec,
seed=seed,
eval_mode=True,
eval_mode=eval_mode,
)

# Build each agent from its specification.
Expand Down
33 changes: 16 additions & 17 deletions ultra/ultra/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def train(
# policy_classes list, transform it to an etag of "dqn-v0:ppo-v0".
etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes])

old_episode = None
for episode in episodes(num_episodes, etag=etag, log_dir=log_dir):

# Reset the environment and retrieve the initial observations.
observations = env.reset()
dones = {"__all__": False}
Expand All @@ -130,24 +132,22 @@ def train(
pickle.HIGHEST_PROTOCOL,
)

evaluation_check(
agents=agents,
agent_ids=agent_ids,
policy_classes=agent_classes,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
**eval_info,
**env.info,
)

while not dones["__all__"]:
# Break if any of the agent's step counts is 1000000 or greater.
if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]):
finished = True
break

# Perform the evaluation check.
evaluation_check(
agents=agents,
agent_ids=agent_ids,
policy_classes=agent_classes,
episode=episode,
log_dir=log_dir,
max_episode_steps=max_episode_steps,
**eval_info,
**env.info,
)

# Request and perform actions on each agent that received an observation.
actions = {
agent_id: agents[agent_id].act(observation, explore=True)
Expand Down Expand Up @@ -183,7 +183,6 @@ def train(
total_step += 1
observations = next_observations

# Normalize the data and record this episode on tensorboard.
episode.record_episode()
episode.record_tensorboard()

Expand Down Expand Up @@ -230,9 +229,9 @@ def train(
)
parser.add_argument(
"--eval-rate",
help="Evaluation rate based on number of observations",
help="Evaluation rate based on number of episodes",
type=int,
default=10000,
default=100,
)
parser.add_argument(
"--seed",
Expand Down Expand Up @@ -282,7 +281,7 @@ def train(
num_episodes=int(args.episodes),
max_episode_steps=int(args.max_episode_steps),
eval_info={
"eval_rate": float(args.eval_rate),
"eval_rate": int(args.eval_rate),
"eval_episodes": int(args.eval_episodes),
},
timestep_sec=float(args.timestep),
Expand Down
37 changes: 26 additions & 11 deletions ultra/ultra/utils/episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
class LogInfo:
def __init__(self):
self.data = {
"env_score": 0,
"episode_reward": 0,
"env_score": 0.0,
"episode_return": 0.0,
"dist_center": 0,
"goal_dist": 0,
"speed": 0,
Expand All @@ -66,7 +66,7 @@ def __init__(self):
}

def add(self, infos, rewards):
self.data["env_score"] += int(infos["logs"]["env_score"])
self.data["env_score"] += infos["logs"]["env_score"]
self.data["speed"] += infos["logs"]["speed"]
self.data["max_speed_violation"] += (
1 if infos["logs"]["speed"] > infos["logs"]["closest_wp"].speed_limit else 0
Expand All @@ -79,8 +79,8 @@ def add(self, infos, rewards):
self.data["goal_dist"] = infos["logs"]["goal_dist"]
self.data["ego_linear_jerk"] += infos["logs"]["linear_jerk"]
self.data["ego_angular_jerk"] += infos["logs"]["angular_jerk"]
self.data["episode_reward"] += rewards
self.data["final_pos"] = infos["logs"]["position"]
self.data["episode_return"] += rewards
self.data["final_pos"] = infos["logs"]["position"][:2]
self.data["start_pos"] = infos["logs"]["start"].position
self.data["dist_travelled"] = math.sqrt(
(self.data["final_pos"][1] - self.data["start_pos"][1]) ** 2
Expand All @@ -104,14 +104,13 @@ def step(self):

def normalize(self):
steps = self.data["episode_length"]
self.data["env_score"] /= steps
self.data["dist_center"] /= steps
self.data["speed"] /= steps
self.data["ego_linear_jerk"] /= steps
self.data["ego_angular_jerk"] /= steps
self.data["ego_num_violations"] /= steps
self.data["social_num_violations"] /= steps
self.data["max_speed_violation"] /= steps
# self.data["ego_num_violations"] /= steps
# self.data["social_num_violations"] /= steps
# self.data["max_speed_violation"] /= steps


class Episode:
Expand Down Expand Up @@ -183,9 +182,25 @@ def checkpoint_dir(self, agent_id, iteration):
def train_mode(self):
self.active_tag = "Train"

def eval_train_mode(self):
self.active_tag = "Evaluation_Training"

def eval_mode(self):
self.active_tag = "Evaluation"

def gap_mode(self):
self.active_tag = "Gap"

def calculate_gap(self):
gap_info = self.info["Gap"]
for agent_id, agent_info in self.info["Evaluation"].items():
for key in agent_info.data:
if np.isscalar(gap_info[agent_id].data[key]):
gap_info[agent_id].data[key] = (
self.info["Evaluation_Training"][agent_id].data[key]
- self.info["Evaluation"][agent_id].data[key]
)

def reset(self, mode="Train"):
self.start_time = time.time()
self.timestep_sec = 0.1
Expand Down Expand Up @@ -328,12 +343,12 @@ def episodes(n, etag=None, log_dir=None):
agent_rewards_strings = [
"{}: {:.4f}".format(
agent_id,
agent_info.data["episode_reward"],
agent_info.data["episode_return"],
)
for agent_id, agent_info in e.info[e.active_tag].items()
]
row = (
f"{e.index}/{n}",
f"{e.index + 1}/{n}",
f"{e.sim2wall_ratio:.2f}",
f"{e.steps}",
f"{e.steps_per_second:.2f}",
Expand Down

0 comments on commit a55c732

Please sign in to comment.