Skip to content

Commit

Permalink
Control number of episodes in ULTRA tests (rllib and train) (#739)
Browse files Browse the repository at this point in the history
* Make evaluations run in parallel to training

* Add rollout_fragment_length and max_episode_steps to rllib

* Add new flags to documentation

* Fix flag in example

* Ensure evaluation is not run in test_train.py

* Check if log_dir doesn't exist in test_train.py

* Fix typo in argument and set eval_episodes to 0

* Check if not exists for paths

* Fix formatting

* Update evaluate and train to evaluate per episode (#639)

* Update evaluate and train to eval per episode

* Fix tests and change normalization

* Fix formatting

* Record episode for train/eval at the same time

* Train performance averages last eval_rate episodes

* Add gap_mode and calculate generalization gap

* Enable explore during eval for better comparison

* Fix formatting

* Fix averaging in record_episode

* Only average scalars in record_episode

* Check for scalar when calculating gap

* Fix formatting

* Fix bugs introduced by merge

* Fix calculate_gap

* Fix averaging over episodes

* Fix formatting

* Check eval_episodes is 0 before evaluation

* Evaluate on train and test without exploration

* Make train/test occur in one ray.get

* Remove episode_count

* Remove episode_count from tests

* Remove averaging over training episodes

* Restructure evaluation_check with eval_mode = False

* Fix formatting

* try split ci test method

* Check for valid eval_episodes inside evaluation_check

* Removed averaging  feature of record_episode

* Fix formatting

Co-authored-by: Jenish Patel <[email protected]>

* Merge with ultra-rllib-speedup-tests

* Rename evaluation_task_id for training

* Simplify argument to ray.wait(...)

* Remove print statements

* Fix defaults

* Fix eval-rate in docs

* Initial changes (#744)

Co-authored-by: christianjans <[email protected]>
Co-authored-by: Jenish Patel <[email protected]>
Co-authored-by: christianjans <[email protected]>
  • Loading branch information
4 people authored and Gamenot committed May 12, 2021
1 parent 091c158 commit 4d4ab84
Show file tree
Hide file tree
Showing 13 changed files with 316 additions and 171 deletions.
60 changes: 54 additions & 6 deletions .github/workflows/ci-ultra-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:
- ultra/**

jobs:
test-base:
test-heavy-base-tests:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
container: huaweinoah/smarts:v0.4.13-minimal
Expand All @@ -38,12 +38,58 @@ jobs:
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade -e .
pip install --upgrade numpy
- name: Run ultra tests
- name: Run ultra tests (heavy)
run: |
cd ultra
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/
pytest -v \
./tests/ \
--ignore=./tests/test_ultra_package.py \
--ignore=./tests/test_adapter.py \
--ignore=./tests/test_env.py \
--ignore=./tests/test_episodes.py \
--ignore=./tests/test_scenarios.py \
--ignore=./tests/test_social_vehicles.py \
--ignore=./tests/test_rllib_train.py \
test-light-base-tests:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
container: huaweinoah/smarts:v0.4.13-minimal
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Setup X11
run: |
/usr/bin/Xorg \
-noreset \
+extension GLX \
+extension RANDR \
+extension RENDER \
-logfile ./xdummy.log \
-config /etc/X11/xorg.conf :1 &
- name: Install dependencies
run: |
cd ultra
python3.7 -m venv .venv
. .venv/bin/activate
pip install --upgrade --upgrade-strategy eager pip
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade -e .
pip install --upgrade numpy
- name: Run ultra tests (light)
run: |
cd ultra
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v \
./tests/ \
--ignore=./tests/test_ultra_package.py \
--ignore=./tests/test_train.py \
--ignore=./tests/test_evaluate.py \
--ignore=./tests/test_analysis.py \
test-package-via-setup:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down Expand Up @@ -74,6 +120,7 @@ jobs:
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/test_ultra_package.py
test-package-via-wheel:
runs-on: ubuntu-18.04
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand All @@ -95,9 +142,9 @@ jobs:
cd ultra
python3.7 -m venv .venv
. .venv/bin/activate
pip install --upgrade --upgrade-strategy eager pip
pip install --upgrade --upgrade-strategy eager wheel
pip install --upgrade --upgrade-strategy eager -e .
pip install --upgrade pip
pip install --upgrade -e .
pip install --upgrade numpy
python setup.py bdist_wheel
cd dist
pip install $(ls . | grep ultra)
Expand All @@ -109,6 +156,7 @@ jobs:
. .venv/bin/activate
scl scenario build-all ultra/scenarios/pool
pytest -v ./tests/test_ultra_package.py
# test-package-via-pypi:
# runs-on: ubuntu-18.04
# if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down
6 changes: 3 additions & 3 deletions ultra/docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ Implementations of baseline agents are available in `ultra/baselines/`. Notice,
- `--task`: The task number to run (default is 1).
- `--level`: The level of the task (default is easy).
- `--episodes`: The number of training episodes to run (default is 1000000).
- `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
- `--timestep`: The environment timestep in seconds (default is 0.1).
- `--headless`: Provide this flag to run training without Envision.
- `--eval-episodes`: The number of evaluation episodes (default is 200).
- `--eval-rate`: The rate at which evaluation occurs based on the number of observations (default is 10000).
- `--eval-rate`: The number of training episodes to wait before running the evaluation (default is 200).
- `--seed`: The environment seed (default is 2).
- `--policy`: The policy (agent) to train (default is sac).
- `--log-dir`: The directory to put models, tensorboard data, and training results (default is logs/).
- `--max-steps-episode`: The option to limit the number of steps per epsiodes (default is 10000).

Run the following command to train our DQN agent with a quick training session (if you started Envision in the previous section, refresh your browser to observe the training):
```sh
Expand All @@ -81,11 +81,11 @@ After training your agent, your models should be saved under `logs/<timestamped_
- `--policy`: A string tag on the evaluation experiment directory (default is TD3).
- `--models`: The path to the saved model (default is models/).
- `--episodes`: The number of evaluation episodes (default is 200).
- `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
- `--timestep`: The environment timestep in seconds (default is 0.1).
- `--headless`: Provide this flag to run evaluation without Envision.
- `--experiment-dir`: The path to the spec file that includes adapters and policy parameters.
- `--policy`: The policy (agent) to evaluate (default is sac).
- `--max-steps-episode`: The option to limit the number of steps per epsiodes (default is 10000).

For example, let's re-run our DQN's evaluation with the following command:
```sh
Expand Down
10 changes: 7 additions & 3 deletions ultra/docs/rllib.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,22 @@ of the steps is shown below
- `--task`: The task number to run (default is 1).
- `--level`: The level of the task (default is easy).
- `--episodes`: The number of training episodes to run (default is 100).
- `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
- `--rollout-fragment-length`: The number of steps rolled out before training (default is 200).
- `--episodes`: The number of training episodes to run (default is 100).
- `--timestep`: The environment timestep in seconds (default is 0.1).
- `--headless`: Provide this flag to run training without Envision.
- `--eval-episodes`: The number of evaluation episodes (default is 200).
- `--eval-rate`: The rate at which evaluation occurs based on the number of episodes (default is 10000).
- `--eval-rate`: The number of training episodes to wait before running the evaluation (default is 200).
- `--seed`: The environment seed (default is 2).
- `--policy`: The policy (agent) to train (default is ppo). Only PPO is supported for now.
- `--log-dir`: The directory to put models, tensorboard data, and training results (default is logs/).
- `--training-batch-samples` : The number of trainig samples per iteration (default is 4000).
- `--train-batch-size` : The number of training samples per epoch (default is 4000).
- `--sgd-minibatch-size` : The training samples per gradient descent step (default is 128).

An example to show the how to run rllib training
```sh
$ python ultra/rllib_train.py --task 1 --level easy --models logs/<timestamped_experiment_name>/models/ --episodes 5 --max-samples 200
$ python ultra/rllib_train.py --task 1 --level easy --models logs/<timestamped_experiment_name>/models/ --episodes 5 --train-batch-size 200
```
> This will produce another experiment directory under `logs/` containing the results of the training/testing
Expand Down
8 changes: 8 additions & 0 deletions ultra/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,14 @@ def test_interface_analyze(self):
if os.path.exists(save_dir):
self.assertTrue(True)
shutil.rmtree(save_dir)
else:
self.assertTrue(False)

if os.path.exists(output):
self.assertTrue(True)
shutil.rmtree(output)
else:
self.assertTrue(False)

def test_analyze_scenario(self):
save_dir = os.path.join(AnalysisTest.OUTPUT_DIRECTORY, "scenarios/")
Expand Down Expand Up @@ -133,10 +137,14 @@ def test_analyze_scenario(self):
if os.path.exists(save_dir):
self.assertTrue(True)
shutil.rmtree(save_dir)
else:
self.assertTrue(False)

if os.path.exists(output):
self.assertTrue(True)
shutil.rmtree(output)
else:
self.assertTrue(False)

def test_save_histogram(self):
try:
Expand Down
4 changes: 1 addition & 3 deletions ultra/tests/test_episode.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def test_episode_counter(self):
@ray.remote(max_calls=1, num_gpus=0, num_cpus=1)
def run_experiment():
agent, env = prepare_test_env_agent()
episode_count = 0
log_dir = os.path.join(EpisodeTest.OUTPUT_DIRECTORY, "logs/")
for episode in episodes(2, etag="Train", log_dir=log_dir):
observations = env.reset()
Expand Down Expand Up @@ -150,9 +149,8 @@ def run_experiment():
)
state = next_state
total_step += 1
episode_count += 1
env.close()
return episode_count
return episode.index

ray.init(ignore_reinit_error=True)
episode_count = ray.get(run_experiment.remote())
Expand Down
12 changes: 10 additions & 2 deletions ultra/tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pickle
import re
import shutil
import sys
import time
import unittest

import dill
Expand All @@ -34,7 +34,7 @@
from smarts.core.controllers import ActionSpaceType
from ultra.baselines.agent_spec import BaselineAgentSpec
from ultra.baselines.sac.sac.policy import SACPolicy
from ultra.evaluate import evaluate, evaluation_check
from ultra.evaluate import evaluate, evaluation_check, collect_evaluations
from ultra.utils.episode import episodes

seed = 2
Expand Down Expand Up @@ -378,6 +378,7 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):

total_step = 0
etag = ":".join([policy_class.split(":")[-1] for policy_class in agent_classes])
evaluation_task_ids = dict()

for episode in episodes(1, etag=etag, log_dir=log_dir):
observations = env.reset()
Expand Down Expand Up @@ -410,10 +411,13 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
max_episode_steps=2,
policy_classes=agent_classes,
scenario_info=scenario_info,
evaluation_task_ids=evaluation_task_ids,
timestep_sec=0.1,
headless=True,
log_dir=log_dir,
)
collect_evaluations(evaluation_task_ids=evaluation_task_ids)

actions = {
agent_id: agents[agent_id].act(observation, explore=True)
for agent_id, observation in observations.items()
Expand Down Expand Up @@ -444,4 +448,8 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
total_step += 1
observations = next_observations

# Wait on the remaining evaluations to finish.
while collect_evaluations(evaluation_task_ids):
time.sleep(0.1)

env.close()
15 changes: 7 additions & 8 deletions ultra/tests/test_rllib_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,13 @@ def test_rllib_train_cli(self):
log_dir = os.path.join(RLlibTrainTest.OUTPUT_DIRECTORY, "tests/rllib_results/")
try:
os.system(
f"python ultra/rllib_train.py --task 00 --level easy --episodes 1 --training-batch-size 200 --headless --log-dir {log_dir}"
f"python ultra/rllib_train.py --task 00 --level easy --episodes 1 --max-episode-steps 2 --eval-episodes 1 --train-batch-size 1 --sgd-minibatch-size 1 --rollout-fragment-length 1 --headless --log-dir {log_dir}"
)
except Exception as err:
print(err)
self.assertTrue(False)

if os.path.exists(log_dir):
self.assertTrue(True)
else:
if not os.path.exists(log_dir):
self.assertTrue(False)

def test_rllib_train_method(self):
Expand All @@ -61,6 +59,8 @@ def test_rllib_train_method(self):
train(
task=("00", "easy"),
num_episodes=1,
max_episode_steps=2,
rollout_fragment_length=1,
policy="ppo",
eval_info={
"eval_rate": 2,
Expand All @@ -69,16 +69,15 @@ def test_rllib_train_method(self):
timestep_sec=0.1,
headless=True,
seed=2,
training_batch_size=200,
train_batch_size=1,
sgd_minibatch_size=1,
log_dir=log_dir,
)
except Exception as err:
print(err)
self.assertTrue(False)

if os.path.exists(log_dir):
self.assertTrue(True)
else:
if not os.path.exists(log_dir):
self.assertTrue(False)

@classmethod
Expand Down
Loading

0 comments on commit 4d4ab84

Please sign in to comment.