Control number of episodes in ULTRA tests (rllib and train) (#739)

* Make evaluations run in parallel to training * Add rollout_fragment_length and max_episode_steps to rllib * Add new flags to documentation * Fix flag in example * Ensure evaluation is not run in test_train.py * Check if log_dir doesn't exist in test_train.py * Fix typo in argument and set eval_episodes to 0 * Check if not exists for paths * Fix formatting * Update evaluate and train to evaluate per episode (#639) * Update evaluate and train to eval per episode * Fix tests and change normalization * Fix formatting * Record episode for train/eval at the same time * Train performance averages last eval_rate episodes * Add gap_mode and calculate generalization gap * Enable explore during eval for better comparison * Fix formatting * Fix averaging in record_episode * Only average scalars in record_episode * Check for scalar when calculating gap * Fix formatting * Fix bugs introduced by merge * Fix calculate_gap * Fix averaging over episodes * Fix formatting * Check eval_episodes is 0 before evaluation * Evaluate on train and test without exploration * Make train/test occur in one ray.get * Remove episode_count * Remove episode_count from tests * Remove averaging over training episodes * Restructure evaluation_check with eval_mode = False * Fix formatting * try split ci test method * Check for valid eval_episodes inside evaluation_check * Removed averaging feature of record_episode * Fix formatting Co-authored-by: Jenish Patel <[email protected]> * Merge with ultra-rllib-speedup-tests * Rename evaluation_task_id for training * Simplify argument to ray.wait(...) * Remove print statements * Fix defaults * Fix eval-rate in docs * Initial changes (#744) Co-authored-by: christianjans <[email protected]> Co-authored-by: Jenish Patel <[email protected]> Co-authored-by: christianjans <[email protected]>
huawei-noah · May 12, 2021 · 4d4ab84 · 4d4ab84
1 parent 091c158
commit 4d4ab84
Show file tree

Hide file tree

Showing 13 changed files with 316 additions and 171 deletions.
diff --git a/.github/workflows/ci-ultra-tests.yml b/.github/workflows/ci-ultra-tests.yml
@@ -13,7 +13,7 @@ on:
       - ultra/**
 
 jobs:
-  test-base:
+  test-heavy-base-tests:
     runs-on: ubuntu-18.04
     if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     container: huaweinoah/smarts:v0.4.13-minimal
@@ -38,12 +38,58 @@ jobs:
           pip install --upgrade --upgrade-strategy eager wheel
           pip install --upgrade -e .
           pip install --upgrade numpy
-      - name: Run ultra tests
+      - name: Run ultra tests (heavy)
         run: |
           cd ultra
           . .venv/bin/activate
           scl scenario build-all ultra/scenarios/pool
-          pytest -v ./tests/
+          pytest -v \
+          ./tests/ \
+          --ignore=./tests/test_ultra_package.py \
+          --ignore=./tests/test_adapter.py \
+          --ignore=./tests/test_env.py \
+          --ignore=./tests/test_episodes.py \
+          --ignore=./tests/test_scenarios.py \
+          --ignore=./tests/test_social_vehicles.py \
+          --ignore=./tests/test_rllib_train.py \
+
+  test-light-base-tests:
+    runs-on: ubuntu-18.04
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
+    container: huaweinoah/smarts:v0.4.13-minimal
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Setup X11
+        run: |
+          /usr/bin/Xorg \
+            -noreset \
+            +extension GLX \
+            +extension RANDR \
+            +extension RENDER \
+            -logfile ./xdummy.log \
+            -config /etc/X11/xorg.conf :1 &
+      - name: Install dependencies
+        run: |
+          cd ultra
+          python3.7 -m venv .venv
+          . .venv/bin/activate
+          pip install --upgrade --upgrade-strategy eager pip
+          pip install --upgrade --upgrade-strategy eager wheel
+          pip install --upgrade -e .
+          pip install --upgrade numpy
+      - name: Run ultra tests (light)
+        run: |
+          cd ultra
+          . .venv/bin/activate
+          scl scenario build-all ultra/scenarios/pool
+          pytest -v \
+          ./tests/ \
+          --ignore=./tests/test_ultra_package.py \
+          --ignore=./tests/test_train.py \
+          --ignore=./tests/test_evaluate.py \
+          --ignore=./tests/test_analysis.py \
+
   test-package-via-setup:
     runs-on: ubuntu-18.04
     if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
@@ -74,6 +120,7 @@ jobs:
           . .venv/bin/activate
           scl scenario build-all ultra/scenarios/pool
           pytest -v ./tests/test_ultra_package.py
+
   test-package-via-wheel:
     runs-on: ubuntu-18.04
     if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
@@ -95,9 +142,9 @@ jobs:
           cd ultra
           python3.7 -m venv .venv
           . .venv/bin/activate
-          pip install --upgrade --upgrade-strategy eager pip
-          pip install --upgrade --upgrade-strategy eager wheel
-          pip install --upgrade --upgrade-strategy eager -e .
+          pip install --upgrade pip
+          pip install --upgrade -e .
+          pip install --upgrade numpy
           python setup.py bdist_wheel
           cd dist
           pip install $(ls . | grep ultra)
@@ -109,6 +156,7 @@ jobs:
           . .venv/bin/activate
           scl scenario build-all ultra/scenarios/pool
           pytest -v ./tests/test_ultra_package.py
+
   # test-package-via-pypi:
   #   runs-on: ubuntu-18.04
   #   if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository

diff --git a/ultra/docs/getting_started.md b/ultra/docs/getting_started.md
@@ -51,14 +51,14 @@ Implementations of baseline agents are available in `ultra/baselines/`. Notice,
   - `--task`: The task number to run (default is 1).
   - `--level`: The level of the task (default is easy).
   - `--episodes`: The number of training episodes to run (default is 1000000).
+  - `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
   - `--timestep`: The environment timestep in seconds (default is 0.1).
   - `--headless`: Provide this flag to run training without Envision.
   - `--eval-episodes`: The number of evaluation episodes (default is 200).
-  - `--eval-rate`: The rate at which evaluation occurs based on the number of observations (default is 10000).
+  - `--eval-rate`: The number of training episodes to wait before running the evaluation (default is 200).
   - `--seed`: The environment seed (default is 2).
   - `--policy`: The policy (agent) to train (default is sac).
   - `--log-dir`: The directory to put models, tensorboard data, and training results (default is logs/).
-  - `--max-steps-episode`: The option to limit the number of steps per epsiodes (default is 10000).
 
   Run the following command to train our DQN agent with a quick training session (if you started Envision in the previous section, refresh your browser to observe the training):
   ```sh
@@ -81,11 +81,11 @@ After training your agent, your models should be saved under `logs/<timestamped_
   - `--policy`: A string tag on the evaluation experiment directory (default is TD3).
   - `--models`: The path to the saved model (default is models/).
   - `--episodes`: The number of evaluation episodes (default is 200).
+  - `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
   - `--timestep`: The environment timestep in seconds (default is 0.1).
   - `--headless`: Provide this flag to run evaluation without Envision.
   - `--experiment-dir`: The path to the spec file that includes adapters and policy parameters.
   - `--policy`: The policy (agent) to evaluate (default is sac).
-  - `--max-steps-episode`: The option to limit the number of steps per epsiodes (default is 10000).
 
   For example, let's re-run our DQN's evaluation with the following command:
   ```sh

diff --git a/ultra/docs/rllib.md b/ultra/docs/rllib.md
@@ -25,18 +25,22 @@ of the steps is shown below
   - `--task`: The task number to run (default is 1).
   - `--level`: The level of the task (default is easy).
   - `--episodes`: The number of training episodes to run (default is 100).
+  - `--max-episode-steps`: The option to limit the number of steps per epsiodes (default is 200).
+  - `--rollout-fragment-length`: The number of steps rolled out before training (default is 200).
+  - `--episodes`: The number of training episodes to run (default is 100).
   - `--timestep`: The environment timestep in seconds (default is 0.1).
   - `--headless`: Provide this flag to run training without Envision.
   - `--eval-episodes`: The number of evaluation episodes (default is 200).
-  - `--eval-rate`: The rate at which evaluation occurs based on the number of episodes (default is 10000).
+  - `--eval-rate`: The number of training episodes to wait before running the evaluation (default is 200).
   - `--seed`: The environment seed (default is 2).
   - `--policy`: The policy (agent) to train (default is ppo). Only PPO is supported for now.
   - `--log-dir`: The directory to put models, tensorboard data, and training results (default is logs/).
-  - `--training-batch-samples` : The number of trainig samples per iteration (default is 4000).
+  - `--train-batch-size` : The number of training samples per epoch (default is 4000).
+  - `--sgd-minibatch-size` : The training samples per gradient descent step (default is 128).
 
   An example to show the how to run rllib training
   ```sh
-  $ python ultra/rllib_train.py --task 1 --level easy --models logs/<timestamped_experiment_name>/models/ --episodes 5 --max-samples 200
+  $ python ultra/rllib_train.py --task 1 --level easy --models logs/<timestamped_experiment_name>/models/ --episodes 5 --train-batch-size 200
   ```
   > This will produce another experiment directory under `logs/` containing the results of the training/testing
   

diff --git a/ultra/tests/test_analysis.py b/ultra/tests/test_analysis.py
@@ -74,10 +74,14 @@ def test_interface_analyze(self):
         if os.path.exists(save_dir):
             self.assertTrue(True)
             shutil.rmtree(save_dir)
+        else:
+            self.assertTrue(False)
 
         if os.path.exists(output):
             self.assertTrue(True)
             shutil.rmtree(output)
+        else:
+            self.assertTrue(False)
 
     def test_analyze_scenario(self):
         save_dir = os.path.join(AnalysisTest.OUTPUT_DIRECTORY, "scenarios/")
@@ -133,10 +137,14 @@ def test_analyze_scenario(self):
         if os.path.exists(save_dir):
             self.assertTrue(True)
             shutil.rmtree(save_dir)
+        else:
+            self.assertTrue(False)
 
         if os.path.exists(output):
             self.assertTrue(True)
             shutil.rmtree(output)
+        else:
+            self.assertTrue(False)
 
     def test_save_histogram(self):
         try:

diff --git a/ultra/tests/test_episode.py b/ultra/tests/test_episode.py
@@ -120,7 +120,6 @@ def test_episode_counter(self):
         @ray.remote(max_calls=1, num_gpus=0, num_cpus=1)
         def run_experiment():
             agent, env = prepare_test_env_agent()
-            episode_count = 0
             log_dir = os.path.join(EpisodeTest.OUTPUT_DIRECTORY, "logs/")
             for episode in episodes(2, etag="Train", log_dir=log_dir):
                 observations = env.reset()
@@ -150,9 +149,8 @@ def run_experiment():
                     )
                     state = next_state
                     total_step += 1
-                episode_count += 1
             env.close()
-            return episode_count
+            return episode.index
 
         ray.init(ignore_reinit_error=True)
         episode_count = ray.get(run_experiment.remote())

diff --git a/ultra/tests/test_evaluate.py b/ultra/tests/test_evaluate.py
@@ -24,7 +24,7 @@
 import pickle
 import re
 import shutil
-import sys
+import time
 import unittest
 
 import dill
@@ -34,7 +34,7 @@
 from smarts.core.controllers import ActionSpaceType
 from ultra.baselines.agent_spec import BaselineAgentSpec
 from ultra.baselines.sac.sac.policy import SACPolicy
-from ultra.evaluate import evaluate, evaluation_check
+from ultra.evaluate import evaluate, evaluation_check, collect_evaluations
 from ultra.utils.episode import episodes
 
 seed = 2
@@ -378,6 +378,7 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
 
     total_step = 0
     etag = ":".join([policy_class.split(":")[-1] for policy_class in agent_classes])
+    evaluation_task_ids = dict()
 
     for episode in episodes(1, etag=etag, log_dir=log_dir):
         observations = env.reset()
@@ -410,10 +411,13 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
                 max_episode_steps=2,
                 policy_classes=agent_classes,
                 scenario_info=scenario_info,
+                evaluation_task_ids=evaluation_task_ids,
                 timestep_sec=0.1,
                 headless=True,
                 log_dir=log_dir,
             )
+            collect_evaluations(evaluation_task_ids=evaluation_task_ids)
+
             actions = {
                 agent_id: agents[agent_id].act(observation, explore=True)
                 for agent_id, observation in observations.items()
@@ -444,4 +448,8 @@ def run_experiment(scenario_info, num_agents, log_dir, headless=True):
             total_step += 1
             observations = next_observations
 
+    # Wait on the remaining evaluations to finish.
+    while collect_evaluations(evaluation_task_ids):
+        time.sleep(0.1)
+
     env.close()
diff --git a/ultra/tests/test_rllib_train.py b/ultra/tests/test_rllib_train.py
@@ -42,15 +42,13 @@ def test_rllib_train_cli(self):
         log_dir = os.path.join(RLlibTrainTest.OUTPUT_DIRECTORY, "tests/rllib_results/")
         try:
             os.system(
-                f"python ultra/rllib_train.py --task 00 --level easy --episodes 1 --training-batch-size 200 --headless --log-dir {log_dir}"
+                f"python ultra/rllib_train.py --task 00 --level easy --episodes 1 --max-episode-steps 2 --eval-episodes 1 --train-batch-size 1 --sgd-minibatch-size 1 --rollout-fragment-length 1 --headless --log-dir {log_dir}"
             )
         except Exception as err:
             print(err)
             self.assertTrue(False)
 
-        if os.path.exists(log_dir):
-            self.assertTrue(True)
-        else:
+        if not os.path.exists(log_dir):
             self.assertTrue(False)
 
     def test_rllib_train_method(self):
@@ -61,6 +59,8 @@ def test_rllib_train_method(self):
             train(
                 task=("00", "easy"),
                 num_episodes=1,
+                max_episode_steps=2,
+                rollout_fragment_length=1,
                 policy="ppo",
                 eval_info={
                     "eval_rate": 2,
@@ -69,16 +69,15 @@ def test_rllib_train_method(self):
                 timestep_sec=0.1,
                 headless=True,
                 seed=2,
-                training_batch_size=200,
+                train_batch_size=1,
+                sgd_minibatch_size=1,
                 log_dir=log_dir,
             )
         except Exception as err:
             print(err)
             self.assertTrue(False)
 
-        if os.path.exists(log_dir):
-            self.assertTrue(True)
-        else:
+        if not os.path.exists(log_dir):
             self.assertTrue(False)
 
     @classmethod