diff --git a/BENCHMARK.md b/BENCHMARK.md
index 4b04973aa..e0f484671 100644
--- a/BENCHMARK.md
+++ b/BENCHMARK.md
@@ -64,15 +64,15 @@ SLM Lab's benchmark includes environments from the following offerings:
||||||||
|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
| Env. \ Alg. | DQN | DDQN+PER | A2C (GAE) | A2C (n-step) | PPO | SAC |
-| Breakout graph
![](https://user-images.githubusercontent.com/8209263/67737546-dabb6380-f9c8-11e9-901e-b96cc28f1fdf.png)
| 80.88 | 182 | 377 | 398 | **443** | - |
+| Breakout graph
![](https://user-images.githubusercontent.com/8209263/67737546-dabb6380-f9c8-11e9-901e-b96cc28f1fdf.png)
| 80.88 | 182 | 377 | 398 | **443** | 3.51* |
| Pong graph
![](https://user-images.githubusercontent.com/8209263/67737554-e018ae00-f9c8-11e9-92b5-3bd8d213b1e0.png)
| 18.48 | 20.5 | 19.31 | 19.56 | **20.58** | 19.87* |
-| Seaquest graph
![](https://user-images.githubusercontent.com/8209263/67737557-e3139e80-f9c8-11e9-9446-119593ca956b.png)
| 1185 | **4405** | 1070 | 1684 | 1715 | - |
-| Qbert graph
![](https://user-images.githubusercontent.com/8209263/67737559-e575f880-f9c8-11e9-8c98-f14c82041a45.png)
| 5494 | 11426 | 12405 | **13590** | 13460 | 214* |
+| Seaquest graph
![](https://user-images.githubusercontent.com/8209263/67737557-e3139e80-f9c8-11e9-9446-119593ca956b.png)
| 1185 | **4405** | 1070 | 1684 | 1715 | 171* |
+| Qbert graph
![](https://user-images.githubusercontent.com/8209263/67737559-e575f880-f9c8-11e9-8c98-f14c82041a45.png)
| 5494 | 11426 | 12405 | **13590** | 13460 | 923* |
| LunarLander graph
![](https://user-images.githubusercontent.com/8209263/67737566-e7d85280-f9c8-11e9-8df8-39c1205c5308.png)
| 192 | 233 | 25.21 | 68.23 | 214 | **276** |
-| UnityHallway graph
![](https://user-images.githubusercontent.com/8209263/67737569-ead34300-f9c8-11e9-9e26-61fe1d779989.png)
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | - |
-| UnityPushBlock graph
![](https://user-images.githubusercontent.com/8209263/67737577-eeff6080-f9c8-11e9-931c-843ba697779c.png)
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | - |
+| UnityHallway graph
![](https://user-images.githubusercontent.com/8209263/67737569-ead34300-f9c8-11e9-9e26-61fe1d779989.png)
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | 0.01 |
+| UnityPushBlock graph
![](https://user-images.githubusercontent.com/8209263/67737577-eeff6080-f9c8-11e9-931c-843ba697779c.png)
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | -0.70 |
->Episode score at the end of training attained by SLM Lab implementations on discrete-action control problems. Reported episode scores are the average over the last 100 checkpoints, and then averaged over 4 Sessions. Results marked with `*` were trained using the hybrid synchronous/asynchronous version of SAC to parallelize and speed up training time.
+>Episode score at the end of training attained by SLM Lab implementations on discrete-action control problems. Reported episode scores are the average over the last 100 checkpoints, and then averaged over 4 Sessions. A Random baseline with score averaged over 100 episodes is included. Results marked with `*` were trained using the hybrid synchronous/asynchronous version of SAC to parallelize and speed up training time. For SAC, Breakout, Pong and Seaquest were trained for 2M frames instead of 10M frames.
>For the full Atari benchmark, see [Atari Benchmark](https://github.com/kengz/SLM-Lab/blob/benchmark/BENCHMARK.md#atari-benchmark)
diff --git a/README.md b/README.md
index be637756a..e6b79bba1 100644
--- a/README.md
+++ b/README.md
@@ -54,13 +54,13 @@ Due to their standardized design, all the algorithms can be parallelized asynchr
||||||||
|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
| Env. \ Alg. | DQN | DDQN+PER | A2C (GAE) | A2C (n-step) | PPO | SAC |
-| Breakout graph
![](https://user-images.githubusercontent.com/8209263/67737546-dabb6380-f9c8-11e9-901e-b96cc28f1fdf.png)
| 80.88 | 182 | 377 | 398 | **443** | - |
+| Breakout graph
![](https://user-images.githubusercontent.com/8209263/67737546-dabb6380-f9c8-11e9-901e-b96cc28f1fdf.png)
| 80.88 | 182 | 377 | 398 | **443** | 3.51* |
| Pong graph
![](https://user-images.githubusercontent.com/8209263/67737554-e018ae00-f9c8-11e9-92b5-3bd8d213b1e0.png)
| 18.48 | 20.5 | 19.31 | 19.56 | **20.58** | 19.87* |
-| Seaquest graph
![](https://user-images.githubusercontent.com/8209263/67737557-e3139e80-f9c8-11e9-9446-119593ca956b.png)
| 1185 | **4405** | 1070 | 1684 | 1715 | - |
-| Qbert graph
![](https://user-images.githubusercontent.com/8209263/67737559-e575f880-f9c8-11e9-8c98-f14c82041a45.png)
| 5494 | 11426 | 12405 | **13590** | 13460 | 214* |
+| Seaquest graph
![](https://user-images.githubusercontent.com/8209263/67737557-e3139e80-f9c8-11e9-9446-119593ca956b.png)
| 1185 | **4405** | 1070 | 1684 | 1715 | 171* |
+| Qbert graph
![](https://user-images.githubusercontent.com/8209263/67737559-e575f880-f9c8-11e9-8c98-f14c82041a45.png)
| 5494 | 11426 | 12405 | **13590** | 13460 | 923* |
| LunarLander graph
![](https://user-images.githubusercontent.com/8209263/67737566-e7d85280-f9c8-11e9-8df8-39c1205c5308.png)
| 192 | 233 | 25.21 | 68.23 | 214 | **276** |
-| UnityHallway graph
![](https://user-images.githubusercontent.com/8209263/67737569-ead34300-f9c8-11e9-9e26-61fe1d779989.png)
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | - |
-| UnityPushBlock graph
![](https://user-images.githubusercontent.com/8209263/67737577-eeff6080-f9c8-11e9-931c-843ba697779c.png)
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | - |
+| UnityHallway graph
![](https://user-images.githubusercontent.com/8209263/67737569-ead34300-f9c8-11e9-9e26-61fe1d779989.png)
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | 0.01 |
+| UnityPushBlock graph
![](https://user-images.githubusercontent.com/8209263/67737577-eeff6080-f9c8-11e9-931c-843ba697779c.png)
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | -0.70 |
>For the full Atari benchmark, see [Atari Benchmark](https://github.com/kengz/SLM-Lab/blob/benchmark/BENCHMARK.md#atari-benchmark)
diff --git a/bin/plot_benchmark.py b/bin/plot_benchmark.py
index 558791fd4..11474e141 100644
--- a/bin/plot_benchmark.py
+++ b/bin/plot_benchmark.py
@@ -17,8 +17,10 @@
trial_metrics_path = '*t0_trial_metrics.pkl'
env_name_map = {
'lunar': 'LunarLander',
+ 'reakout': 'Breakout',
'ong': 'Pong',
'bert': 'Qbert',
+ 'eaquest': 'Seaquest',
'humanoid': 'RoboschoolHumanoid',
'humanoidflagrun': 'RoboschoolHumanoidFlagrun',
'humanoidflagrunharder': 'RoboschoolHumanoidFlagrunHarder',
@@ -160,9 +162,9 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None):
'SAC',
]
envs = [
- 'Breakout',
+ 'reakout',
'ong',
- 'Seaquest',
+ 'eaquest',
'bert',
'lunar',
'UnityHallway',
@@ -177,8 +179,8 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None):
# plot normal
envs = [
- 'Breakout',
- 'Seaquest',
+ # 'Breakout',
+ # 'Seaquest',
'lunar',
'UnityHallway',
'UnityPushBlock',
@@ -187,11 +189,17 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None):
# Replot Pong and Qbert for Async SAC
envs = [
+ 'reakout',
'ong',
- 'bert',
+ 'eaquest',
]
plot_envs(algos, envs, data_folder, legend_list, frame_scales=[(-1, 6)])
+envs = [
+ 'bert',
+]
+plot_envs(algos, envs, data_folder, legend_list, frame_scales=[(-1, 8)])
+
# Continuous
# Roboschool + Unity
diff --git a/slm_lab/spec/benchmark/async_sac/async_sac_atari.json b/slm_lab/spec/benchmark/async_sac/async_sac_atari.json
index 82f078607..ce33484f4 100644
--- a/slm_lab/spec/benchmark/async_sac/async_sac_atari.json
+++ b/slm_lab/spec/benchmark/async_sac/async_sac_atari.json
@@ -49,6 +49,7 @@
"name": "BreakoutNoFrameskip-v4",
"frame_op": "concat",
"frame_op_len": 4,
+ "image_downsize": [64, 64],
"reward_scale": "sign",
"num_envs": 4,
"max_t": null,
@@ -60,8 +61,8 @@
},
"meta": {
"distributed": "shared",
- "log_frequency": 500,
- "eval_frequency": 500,
+ "log_frequency": 1000,
+ "eval_frequency": 1000,
"rigorous_eval": 0,
"max_session": 6,
"max_trial": 1,
@@ -117,6 +118,7 @@
"name": "PongNoFrameskip-v4",
"frame_op": "concat",
"frame_op_len": 4,
+ "image_downsize": [64, 64],
"reward_scale": "sign",
"num_envs": 4,
"max_t": null,
@@ -128,8 +130,8 @@
},
"meta": {
"distributed": "shared",
- "log_frequency": 500,
- "eval_frequency": 500,
+ "log_frequency": 1000,
+ "eval_frequency": 1000,
"rigorous_eval": 0,
"max_session": 6,
"max_trial": 1,
@@ -185,6 +187,7 @@
"name": "QbertNoFrameskip-v4",
"frame_op": "concat",
"frame_op_len": 4,
+ "image_downsize": [64, 64],
"reward_scale": "sign",
"num_envs": 4,
"max_t": null,
@@ -196,8 +199,8 @@
},
"meta": {
"distributed": "shared",
- "log_frequency": 500,
- "eval_frequency": 500,
+ "log_frequency": 1000,
+ "eval_frequency": 1000,
"rigorous_eval": 0,
"max_session": 6,
"max_trial": 1,
@@ -253,6 +256,7 @@
"name": "SeaquestNoFrameskip-v4",
"frame_op": "concat",
"frame_op_len": 4,
+ "image_downsize": [64, 64],
"reward_scale": "sign",
"num_envs": 4,
"max_t": null,
@@ -264,8 +268,8 @@
},
"meta": {
"distributed": "shared",
- "log_frequency": 500,
- "eval_frequency": 500,
+ "log_frequency": 1000,
+ "eval_frequency": 1000,
"rigorous_eval": 0,
"max_session": 6,
"max_trial": 1,
diff --git a/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json b/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json
index 75ed37563..448781def 100644
--- a/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json
+++ b/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json
@@ -12,7 +12,7 @@
},
"memory": {
"name": "Replay",
- "batch_size": 512,
+ "batch_size": 256,
"max_size": 200000,
"use_cer": false
},
@@ -53,7 +53,7 @@
"reward_scale": "sign",
"num_envs": 4,
"max_t": null,
- "max_frame": 5e6
+ "max_frame": 2e6
}],
"body": {
"product": "outer",
@@ -64,7 +64,7 @@
"log_frequency": 1000,
"eval_frequency": 1000,
"rigorous_eval": 0,
- "max_session": 6,
+ "max_session": 4,
"max_trial": 1,
}
},
@@ -119,7 +119,7 @@
"frame_op": "concat",
"frame_op_len": 4,
"image_downsize": [64, 64],
- "reward_scale": "sign",
+ "reward_scale":null,
"num_envs": 4,
"max_t": null,
"max_frame": 1e7
@@ -133,7 +133,7 @@
"log_frequency": 1000,
"eval_frequency": 1000,
"rigorous_eval": 0,
- "max_session": 6,
+ "max_session": 4,
"max_trial": 1,
}
},
diff --git a/slm_lab/spec/benchmark/sac/sac_unity.json b/slm_lab/spec/benchmark/sac/sac_unity.json
index e147f7241..032f96ece 100644
--- a/slm_lab/spec/benchmark/sac/sac_unity.json
+++ b/slm_lab/spec/benchmark/sac/sac_unity.json
@@ -12,22 +12,21 @@
"memory": {
"name": "Replay",
"batch_size": 256,
- "max_size": 200000,
+ "max_size": 100000,
"use_cer": false
},
"net": {
"type": "MLPNet",
- "hid_layers": [256, 256],
- "hid_layers_activation": "relu",
+ "hid_layers": [64, 64, 32],
+ "hid_layers_activation": "leakyrelu",
"init_fn": "orthogonal_",
"clip_grad_val": 0.5,
"loss_spec": {
"name": "MSELoss"
},
"optim_spec": {
- "name": "Lookahead",
- "optimizer": "RAdam",
- "lr": 3e-3,
+ "name": "RAdam",
+ "lr": 3e-4,
},
"lr_scheduler_spec": null,
"update_type": "polyak",