diff --git a/BENCHMARK.md b/BENCHMARK.md index 4b04973aa..e0f484671 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -64,15 +64,15 @@ SLM Lab's benchmark includes environments from the following offerings: |||||||| |:---:|:---:|:---:|:---:|:---:|:---:|:---:| | Env. \ Alg. | DQN | DDQN+PER | A2C (GAE) | A2C (n-step) | PPO | SAC | -| Breakout
graph
| 80.88 | 182 | 377 | 398 | **443** | - | +| Breakout
graph
| 80.88 | 182 | 377 | 398 | **443** | 3.51* | | Pong
graph
| 18.48 | 20.5 | 19.31 | 19.56 | **20.58** | 19.87* | -| Seaquest
graph
| 1185 | **4405** | 1070 | 1684 | 1715 | - | -| Qbert
graph
| 5494 | 11426 | 12405 | **13590** | 13460 | 214* | +| Seaquest
graph
| 1185 | **4405** | 1070 | 1684 | 1715 | 171* | +| Qbert
graph
| 5494 | 11426 | 12405 | **13590** | 13460 | 923* | | LunarLander
graph
| 192 | 233 | 25.21 | 68.23 | 214 | **276** | -| UnityHallway
graph
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | - | -| UnityPushBlock
graph
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | - | +| UnityHallway
graph
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | 0.01 | +| UnityPushBlock
graph
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | -0.70 | ->Episode score at the end of training attained by SLM Lab implementations on discrete-action control problems. Reported episode scores are the average over the last 100 checkpoints, and then averaged over 4 Sessions. Results marked with `*` were trained using the hybrid synchronous/asynchronous version of SAC to parallelize and speed up training time. +>Episode score at the end of training attained by SLM Lab implementations on discrete-action control problems. Reported episode scores are the average over the last 100 checkpoints, and then averaged over 4 Sessions. A Random baseline with score averaged over 100 episodes is included. Results marked with `*` were trained using the hybrid synchronous/asynchronous version of SAC to parallelize and speed up training time. For SAC, Breakout, Pong and Seaquest were trained for 2M frames instead of 10M frames. >For the full Atari benchmark, see [Atari Benchmark](https://github.com/kengz/SLM-Lab/blob/benchmark/BENCHMARK.md#atari-benchmark) diff --git a/README.md b/README.md index be637756a..e6b79bba1 100644 --- a/README.md +++ b/README.md @@ -54,13 +54,13 @@ Due to their standardized design, all the algorithms can be parallelized asynchr |||||||| |:---:|:---:|:---:|:---:|:---:|:---:|:---:| | Env. \ Alg. | DQN | DDQN+PER | A2C (GAE) | A2C (n-step) | PPO | SAC | -| Breakout
graph
| 80.88 | 182 | 377 | 398 | **443** | - | +| Breakout
graph
| 80.88 | 182 | 377 | 398 | **443** | 3.51* | | Pong
graph
| 18.48 | 20.5 | 19.31 | 19.56 | **20.58** | 19.87* | -| Seaquest
graph
| 1185 | **4405** | 1070 | 1684 | 1715 | - | -| Qbert
graph
| 5494 | 11426 | 12405 | **13590** | 13460 | 214* | +| Seaquest
graph
| 1185 | **4405** | 1070 | 1684 | 1715 | 171* | +| Qbert
graph
| 5494 | 11426 | 12405 | **13590** | 13460 | 923* | | LunarLander
graph
| 192 | 233 | 25.21 | 68.23 | 214 | **276** | -| UnityHallway
graph
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | - | -| UnityPushBlock
graph
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | - | +| UnityHallway
graph
| -0.32 | 0.27 | 0.08 | -0.96 | **0.73** | 0.01 | +| UnityPushBlock
graph
| 4.88 | 4.93 | 4.68 | 4.93 | **4.97** | -0.70 | >For the full Atari benchmark, see [Atari Benchmark](https://github.com/kengz/SLM-Lab/blob/benchmark/BENCHMARK.md#atari-benchmark) diff --git a/bin/plot_benchmark.py b/bin/plot_benchmark.py index 558791fd4..11474e141 100644 --- a/bin/plot_benchmark.py +++ b/bin/plot_benchmark.py @@ -17,8 +17,10 @@ trial_metrics_path = '*t0_trial_metrics.pkl' env_name_map = { 'lunar': 'LunarLander', + 'reakout': 'Breakout', 'ong': 'Pong', 'bert': 'Qbert', + 'eaquest': 'Seaquest', 'humanoid': 'RoboschoolHumanoid', 'humanoidflagrun': 'RoboschoolHumanoidFlagrun', 'humanoidflagrunharder': 'RoboschoolHumanoidFlagrunHarder', @@ -160,9 +162,9 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None): 'SAC', ] envs = [ - 'Breakout', + 'reakout', 'ong', - 'Seaquest', + 'eaquest', 'bert', 'lunar', 'UnityHallway', @@ -177,8 +179,8 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None): # plot normal envs = [ - 'Breakout', - 'Seaquest', + # 'Breakout', + # 'Seaquest', 'lunar', 'UnityHallway', 'UnityPushBlock', @@ -187,11 +189,17 @@ def plot_envs(algos, envs, data_folder, legend_list, frame_scales=None): # Replot Pong and Qbert for Async SAC envs = [ + 'reakout', 'ong', - 'bert', + 'eaquest', ] plot_envs(algos, envs, data_folder, legend_list, frame_scales=[(-1, 6)]) +envs = [ + 'bert', +] +plot_envs(algos, envs, data_folder, legend_list, frame_scales=[(-1, 8)]) + # Continuous # Roboschool + Unity diff --git a/slm_lab/spec/benchmark/async_sac/async_sac_atari.json b/slm_lab/spec/benchmark/async_sac/async_sac_atari.json index 82f078607..ce33484f4 100644 --- a/slm_lab/spec/benchmark/async_sac/async_sac_atari.json +++ b/slm_lab/spec/benchmark/async_sac/async_sac_atari.json @@ -49,6 +49,7 @@ "name": "BreakoutNoFrameskip-v4", "frame_op": "concat", "frame_op_len": 4, + "image_downsize": [64, 64], "reward_scale": "sign", "num_envs": 4, "max_t": null, @@ -60,8 +61,8 @@ }, "meta": { "distributed": "shared", - "log_frequency": 500, - "eval_frequency": 500, + "log_frequency": 1000, + "eval_frequency": 1000, "rigorous_eval": 0, "max_session": 6, "max_trial": 1, @@ -117,6 +118,7 @@ "name": "PongNoFrameskip-v4", "frame_op": "concat", "frame_op_len": 4, + "image_downsize": [64, 64], "reward_scale": "sign", "num_envs": 4, "max_t": null, @@ -128,8 +130,8 @@ }, "meta": { "distributed": "shared", - "log_frequency": 500, - "eval_frequency": 500, + "log_frequency": 1000, + "eval_frequency": 1000, "rigorous_eval": 0, "max_session": 6, "max_trial": 1, @@ -185,6 +187,7 @@ "name": "QbertNoFrameskip-v4", "frame_op": "concat", "frame_op_len": 4, + "image_downsize": [64, 64], "reward_scale": "sign", "num_envs": 4, "max_t": null, @@ -196,8 +199,8 @@ }, "meta": { "distributed": "shared", - "log_frequency": 500, - "eval_frequency": 500, + "log_frequency": 1000, + "eval_frequency": 1000, "rigorous_eval": 0, "max_session": 6, "max_trial": 1, @@ -253,6 +256,7 @@ "name": "SeaquestNoFrameskip-v4", "frame_op": "concat", "frame_op_len": 4, + "image_downsize": [64, 64], "reward_scale": "sign", "num_envs": 4, "max_t": null, @@ -264,8 +268,8 @@ }, "meta": { "distributed": "shared", - "log_frequency": 500, - "eval_frequency": 500, + "log_frequency": 1000, + "eval_frequency": 1000, "rigorous_eval": 0, "max_session": 6, "max_trial": 1, diff --git a/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json b/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json index 75ed37563..448781def 100644 --- a/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json +++ b/slm_lab/spec/benchmark/async_sac/async_sac_qbert.json @@ -12,7 +12,7 @@ }, "memory": { "name": "Replay", - "batch_size": 512, + "batch_size": 256, "max_size": 200000, "use_cer": false }, @@ -53,7 +53,7 @@ "reward_scale": "sign", "num_envs": 4, "max_t": null, - "max_frame": 5e6 + "max_frame": 2e6 }], "body": { "product": "outer", @@ -64,7 +64,7 @@ "log_frequency": 1000, "eval_frequency": 1000, "rigorous_eval": 0, - "max_session": 6, + "max_session": 4, "max_trial": 1, } }, @@ -119,7 +119,7 @@ "frame_op": "concat", "frame_op_len": 4, "image_downsize": [64, 64], - "reward_scale": "sign", + "reward_scale":null, "num_envs": 4, "max_t": null, "max_frame": 1e7 @@ -133,7 +133,7 @@ "log_frequency": 1000, "eval_frequency": 1000, "rigorous_eval": 0, - "max_session": 6, + "max_session": 4, "max_trial": 1, } }, diff --git a/slm_lab/spec/benchmark/sac/sac_unity.json b/slm_lab/spec/benchmark/sac/sac_unity.json index e147f7241..032f96ece 100644 --- a/slm_lab/spec/benchmark/sac/sac_unity.json +++ b/slm_lab/spec/benchmark/sac/sac_unity.json @@ -12,22 +12,21 @@ "memory": { "name": "Replay", "batch_size": 256, - "max_size": 200000, + "max_size": 100000, "use_cer": false }, "net": { "type": "MLPNet", - "hid_layers": [256, 256], - "hid_layers_activation": "relu", + "hid_layers": [64, 64, 32], + "hid_layers_activation": "leakyrelu", "init_fn": "orthogonal_", "clip_grad_val": 0.5, "loss_spec": { "name": "MSELoss" }, "optim_spec": { - "name": "Lookahead", - "optimizer": "RAdam", - "lr": 3e-3, + "name": "RAdam", + "lr": 3e-4, }, "lr_scheduler_spec": null, "update_type": "polyak",