-
Notifications
You must be signed in to change notification settings - Fork 9
/
rlunplugged_policies.json
1 lines (1 loc) · 510 KB
/
rlunplugged_policies.json
1
[{"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 29.358801241, "return_std": 0.1588983081, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000025001", "id": "13830900_1"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 789.7786076513, "return_std": 1.9476305972999999, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000050001", "id": "13830900_1"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 857.0629866011, "return_std": 0.8659249989000001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000100001", "id": "13830900_1"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 740.8658119867, "return_std": 5.9190549566, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000200001", "id": "13830900_1"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 865.5244789528, "return_std": 1.0654499951, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000400001", "id": "13830900_1"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 840.5637882514, "return_std": 1.2870909025000001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000000800001", "id": "13830900_1"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 584.0747589835, "return_std": 100.6448472654, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000001600001", "id": "13830900_1"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 426.5868356541, "return_std": 138.9445979329, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13830900_1/step_000002500001", "id": "13830900_1"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 46.9911688345, "return_std": 3.0969011924, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000025001", "id": "13830900_2"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 700.6273296895, "return_std": 0.48730361800000005, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000050001", "id": "13830900_2"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 869.2077330783, "return_std": 1.0849584292, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000100001", "id": "13830900_2"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 837.5244536429, "return_std": 26.1487099079, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000200001", "id": "13830900_2"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 829.6104607582, "return_std": 0.2026372162, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000400001", "id": "13830900_2"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 758.8651426728, "return_std": 0.9903374836000001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000000800001", "id": "13830900_2"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 625.2381488269, "return_std": 118.1495121589, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000001600001", "id": "13830900_2"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 530.8880646286, "return_std": 8.1574099955, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13830900_2/step_000002500001", "id": "13830900_2"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 2.3973899364, "return_std": 0.1705125063, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000025001", "id": "13830900_3"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 96.4439946448, "return_std": 2.4285506733, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000050001", "id": "13830900_3"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 825.7919277597, "return_std": 1.8163647206, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000100001", "id": "13830900_3"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 831.261074331, "return_std": 0.9505169081, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000200001", "id": "13830900_3"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 772.4658012206, "return_std": 0.3505962016, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000400001", "id": "13830900_3"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 752.7579246199, "return_std": 54.4380260638, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000000800001", "id": "13830900_3"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 759.0739775945, "return_std": 8.2382099278, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000001600001", "id": "13830900_3"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 648.8148390814, "return_std": 11.1406384725, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13830900_3/step_000002500001", "id": "13830900_3"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 58.6772640661, "return_std": 26.1340095653, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000025001", "id": "13830900_4"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 66.8142379583, "return_std": 25.3350673359, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000050001", "id": "13830900_4"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 100.5185047984, "return_std": 50.9557649578, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000100001", "id": "13830900_4"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 126.70416824509999, "return_std": 48.8967415599, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000200001", "id": "13830900_4"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 225.5697265943, "return_std": 93.8134750477, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000400001", "id": "13830900_4"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 282.2810552668, "return_std": 81.2381792699, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000000800001", "id": "13830900_4"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 337.3791798137, "return_std": 171.0038475527, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000001600001", "id": "13830900_4"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 279.7473238259, "return_std": 116.3526143289, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13830900_4/step_000002500001", "id": "13830900_4"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 77.0059226497, "return_std": 33.6914927294, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000025001", "id": "13830900_5"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 76.6546328154, "return_std": 26.872311025, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000050001", "id": "13830900_5"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 100.7273995631, "return_std": 36.6196594271, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000100001", "id": "13830900_5"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 117.2751450766, "return_std": 62.3181634523, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000200001", "id": "13830900_5"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 188.9186595616, "return_std": 113.4777969568, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000400001", "id": "13830900_5"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 261.6066785913, "return_std": 84.1232066047, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000000800001", "id": "13830900_5"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 319.273084453, "return_std": 127.1164303779, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000001600001", "id": "13830900_5"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 194.5604448479, "return_std": 99.1907964563, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13830900_5/step_000002500001", "id": "13830900_5"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 57.0002755003, "return_std": 39.3198377801, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000025001", "id": "13830900_6"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 90.3957113604, "return_std": 31.8479810086, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000050001", "id": "13830900_6"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 48.7943077049, "return_std": 12.650244558, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000100001", "id": "13830900_6"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 183.9888033514, "return_std": 69.3755688788, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000200001", "id": "13830900_6"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 142.8899019935, "return_std": 81.3732870146, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000400001", "id": "13830900_6"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 320.1182779706, "return_std": 116.6544895566, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000000800001", "id": "13830900_6"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 230.2869690008, "return_std": 105.4926767991, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000001600001", "id": "13830900_6"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 291.1212694833, "return_std": 119.2699292668, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13830900_6/step_000002500001", "id": "13830900_6"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 134.6966632963, "return_std": 329.6511114226, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000025001", "id": "13830900_7"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 213.0869565217, "return_std": 394.6236824248, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000050001", "id": "13830900_7"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 345.3282520325, "return_std": 442.1229293002, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000100001", "id": "13830900_7"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 540.8779247202, "return_std": 444.3459315384, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000200001", "id": "13830900_7"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 797.7884615385, "return_std": 343.5706994495, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000400001", "id": "13830900_7"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 679.0556680162, "return_std": 410.5368170509, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000000800001", "id": "13830900_7"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 713.9094608342, "return_std": 414.3945709463, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000001600001", "id": "13830900_7"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 646.755351682, "return_std": 431.2789798419, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13830900_7/step_000002500001", "id": "13830900_7"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 122.4378159757, "return_std": 303.2974406014, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000025001", "id": "13830900_8"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 297.1404040404, "return_std": 440.183969128, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000050001", "id": "13830900_8"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 335.0313131313, "return_std": 441.3666385617, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000100001", "id": "13830900_8"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 631.975, "return_std": 448.9009762314, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000200001", "id": "13830900_8"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 753.2941176471, "return_std": 380.7512273957, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000400001", "id": "13830900_8"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 744.530964467, "return_std": 358.5055607646, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000000800001", "id": "13830900_8"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 650.6131979695, "return_std": 443.0207096919, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000001600001", "id": "13830900_8"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 553.6161616162, "return_std": 435.0168786068, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13830900_8/step_000002500001", "id": "13830900_8"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 101.4853090172, "return_std": 296.5822636035, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000025001", "id": "13830900_9"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 225.7748478702, "return_std": 402.3142015487, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000050001", "id": "13830900_9"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 305.2195121951, "return_std": 414.8273765987, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000100001", "id": "13830900_9"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 679.2153690597, "return_std": 418.150662858, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000200001", "id": "13830900_9"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 739.0455465587, "return_std": 378.0885334938, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000400001", "id": "13830900_9"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 724.6530194473, "return_std": 382.5386048417, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000000800001", "id": "13830900_9"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 613.1233569262, "return_std": 452.2038822969, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000001600001", "id": "13830900_9"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 754.0323232323, "return_std": 366.0269608408, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13830900_9/step_000002500001", "id": "13830900_9"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 82.0729567983, "return_std": 100.437207374, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000025001", "id": "13830900_10"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 81.4435825928, "return_std": 85.5577214145, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000050001", "id": "13830900_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 229.6641359802, "return_std": 228.6393388309, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000100001", "id": "13830900_10"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 296.7352533402, "return_std": 206.9007709313, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000200001", "id": "13830900_10"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 210.028220379, "return_std": 208.494878452, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000400001", "id": "13830900_10"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 175.4339916885, "return_std": 221.0624580337, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000000800001", "id": "13830900_10"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 177.1328238466, "return_std": 214.2250587881, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000001600001", "id": "13830900_10"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 111.9154255177, "return_std": 138.2701555851, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "fish_swim", "policy_path": "13830900_10/step_000002500001", "id": "13830900_10"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 73.2781439773, "return_std": 73.8179975842, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000025001", "id": "13830900_11"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 129.8474779125, "return_std": 162.6454196108, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000050001", "id": "13830900_11"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 340.0394189767, "return_std": 233.2333689929, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000100001", "id": "13830900_11"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 344.236897981, "return_std": 237.0754187285, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000200001", "id": "13830900_11"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 213.9158284037, "return_std": 206.2836986443, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000400001", "id": "13830900_11"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 228.2588128187, "return_std": 232.5642574093, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000000800001", "id": "13830900_11"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 121.6373941388, "return_std": 156.1681684873, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000001600001", "id": "13830900_11"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 103.9839705628, "return_std": 128.2691061665, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "fish_swim", "policy_path": "13830900_11/step_000002500001", "id": "13830900_11"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 74.287986171, "return_std": 61.7711693311, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000025001", "id": "13830900_12"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 107.3748726317, "return_std": 124.1113886712, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000050001", "id": "13830900_12"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 153.769006481, "return_std": 135.8972591151, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000100001", "id": "13830900_12"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 197.4015840452, "return_std": 189.176051199, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000200001", "id": "13830900_12"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 224.480204908, "return_std": 205.5176504445, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000400001", "id": "13830900_12"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 162.495297012, "return_std": 201.1879345902, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000000800001", "id": "13830900_12"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 105.4663197133, "return_std": 135.6897714939, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000001600001", "id": "13830900_12"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 94.0254175233, "return_std": 98.2690983855, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "fish_swim", "policy_path": "13830900_12/step_000002500001", "id": "13830900_12"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.8261570054, "return_std": 0.4545132556, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000025001", "id": "13830900_13"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.6913620213, "return_std": 0.39988094630000004, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000050001", "id": "13830900_13"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.8045572739, "return_std": 0.45958685050000003, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000100001", "id": "13830900_13"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 1.5213956382, "return_std": 1.4688541231, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000200001", "id": "13830900_13"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7135634011, "return_std": 0.5094791444, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000400001", "id": "13830900_13"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 1.2474933038, "return_std": 1.2910988761, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000000800001", "id": "13830900_13"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.8356665154, "return_std": 0.7188334909, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000001600001", "id": "13830900_13"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.9762393816, "return_std": 0.7755211533, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13830900_13/step_000002500001", "id": "13830900_13"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.6940034686000001, "return_std": 0.43424401230000004, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000025001", "id": "13830900_14"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7217953633, "return_std": 0.6182066347, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000050001", "id": "13830900_14"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.9215304628000001, "return_std": 0.5706878724000001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000100001", "id": "13830900_14"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.9458962745, "return_std": 0.8358171084, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000200001", "id": "13830900_14"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.913546528, "return_std": 0.7375359964, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000400001", "id": "13830900_14"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7409792939000001, "return_std": 0.5261578707, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000000800001", "id": "13830900_14"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.6102272334000001, "return_std": 0.4084808522, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000001600001", "id": "13830900_14"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 1.1027454191, "return_std": 0.8492285687000001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13830900_14/step_000002500001", "id": "13830900_14"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7891275481000001, "return_std": 0.46224267, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000025001", "id": "13830900_15"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.6193283323000001, "return_std": 0.3909580412, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000050001", "id": "13830900_15"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.8582481306, "return_std": 0.480054815, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000100001", "id": "13830900_15"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7700687178000001, "return_std": 0.4651092905, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000200001", "id": "13830900_15"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 1.1766183466, "return_std": 1.2335082728, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000400001", "id": "13830900_15"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7358601007000001, "return_std": 0.5040103893, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000000800001", "id": "13830900_15"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 0.7677652739, "return_std": 0.5093057089, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000001600001", "id": "13830900_15"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 1.4108366562, "return_std": 1.2547174629, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13830900_15/step_000002500001", "id": "13830900_15"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 42.6655285767, "return_std": 200.5261487536, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000025001", "id": "13830900_16"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 41.5456724919, "return_std": 197.6292797757, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000050001", "id": "13830900_16"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 38.6544847296, "return_std": 191.0831358403, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000100001", "id": "13830900_16"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 43.5246632838, "return_std": 199.1451206266, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000200001", "id": "13830900_16"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 67.8070392727, "return_std": 242.0203703522, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000400001", "id": "13830900_16"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 121.7382379025, "return_std": 280.3661632285, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000000800001", "id": "13830900_16"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 107.9055001497, "return_std": 275.1853006121, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000001600001", "id": "13830900_16"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 68.2257519932, "return_std": 235.8692579029, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_16/step_000002500001", "id": "13830900_16"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 35.9207872984, "return_std": 183.9264740324, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000025001", "id": "13830900_17"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 50.1965499049, "return_std": 215.8961695306, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000050001", "id": "13830900_17"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 33.7357207652, "return_std": 179.0212937023, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000100001", "id": "13830900_17"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 61.2782271458, "return_std": 226.5750138339, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000200001", "id": "13830900_17"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 80.2994050362, "return_std": 245.2929092601, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000400001", "id": "13830900_17"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 113.5171671576, "return_std": 275.3098272289, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000000800001", "id": "13830900_17"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 113.00051858, "return_std": 282.9226561951, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000001600001", "id": "13830900_17"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 75.4103859845, "return_std": 245.3697344876, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_17/step_000002500001", "id": "13830900_17"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 41.8903563071, "return_std": 199.1330471921, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000025001", "id": "13830900_18"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 36.4172673985, "return_std": 185.8177377762, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000050001", "id": "13830900_18"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 48.4729039206, "return_std": 213.3398627886, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000100001", "id": "13830900_18"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 54.1096438273, "return_std": 223.3817888367, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000200001", "id": "13830900_18"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 79.4810023824, "return_std": 253.9600287901, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000400001", "id": "13830900_18"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 117.8281155718, "return_std": 283.2045906905, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000000800001", "id": "13830900_18"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 135.2372154878, "return_std": 299.7181768159, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000001600001", "id": "13830900_18"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 84.377181184, "return_std": 256.78435688, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13830900_18/step_000002500001", "id": "13830900_18"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 42.9282559306, "return_std": 202.0825160646, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000025001", "id": "13830900_19"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 60.5829757412, "return_std": 237.5678361095, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000050001", "id": "13830900_19"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 47.985238481, "return_std": 212.0896236593, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000100001", "id": "13830900_19"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 50.4258200867, "return_std": 215.2562155742, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000200001", "id": "13830900_19"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 61.2592204999, "return_std": 236.1813505848, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000400001", "id": "13830900_19"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 51.2756700229, "return_std": 214.1013661558, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000000800001", "id": "13830900_19"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 47.5091717276, "return_std": 208.1218083004, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000001600001", "id": "13830900_19"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 42.6649460554, "return_std": 194.0675470494, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_19/step_000002500001", "id": "13830900_19"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 55.4448023958, "return_std": 227.2783315332, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000025001", "id": "13830900_20"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 58.3193763581, "return_std": 231.453494344, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000050001", "id": "13830900_20"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 52.0016228018, "return_std": 217.7925242894, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000100001", "id": "13830900_20"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 50.108066912, "return_std": 213.2712438525, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000200001", "id": "13830900_20"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 55.8310812123, "return_std": 225.82571037, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000400001", "id": "13830900_20"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 50.0798305787, "return_std": 210.1801016198, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000000800001", "id": "13830900_20"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 48.5325787521, "return_std": 210.6817437008, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000001600001", "id": "13830900_20"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 48.5407526323, "return_std": 210.4239954432, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_20/step_000002500001", "id": "13830900_20"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 42.8036550752, "return_std": 200.4820830076, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000025001", "id": "13830900_21"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 51.9046870839, "return_std": 218.7549962418, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000050001", "id": "13830900_21"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 46.7282469069, "return_std": 208.9617859562, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000100001", "id": "13830900_21"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 45.0128532919, "return_std": 204.0505868409, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000200001", "id": "13830900_21"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 43.7366974989, "return_std": 199.493637789, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000400001", "id": "13830900_21"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 59.9447757932, "return_std": 225.9106984484, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000000800001", "id": "13830900_21"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 57.9074093276, "return_std": 228.4726011068, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000001600001", "id": "13830900_21"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 52.6183471576, "return_std": 216.5379963201, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13830900_21/step_000002500001", "id": "13830900_21"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 949.8641008439, "return_std": 47.5405009163, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000025001", "id": "13830900_22"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 897.793639847, "return_std": 55.3308939613, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000050001", "id": "13830900_22"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 887.314650964, "return_std": 72.2532940131, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000100001", "id": "13830900_22"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 789.1825179618, "return_std": 44.9807828513, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000200001", "id": "13830900_22"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 713.7679152458, "return_std": 64.6223092679, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000400001", "id": "13830900_22"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 669.0592097673, "return_std": 228.601267179, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000000800001", "id": "13830900_22"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 744.4528840041, "return_std": 68.8299371626, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000001600001", "id": "13830900_22"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 644.1661517063, "return_std": 56.3492659183, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_stand", "policy_path": "13830900_22/step_000002500001", "id": "13830900_22"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 639.3853864117, "return_std": 265.6487447313, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000025001", "id": "13830900_23"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 907.5256824102, "return_std": 66.4790313405, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000050001", "id": "13830900_23"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 947.2076660256, "return_std": 28.6866859965, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000100001", "id": "13830900_23"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 821.465789896, "return_std": 40.1530789026, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000200001", "id": "13830900_23"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 720.7649942361, "return_std": 54.481061741, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000400001", "id": "13830900_23"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 630.8956219157, "return_std": 111.70500920960001, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000000800001", "id": "13830900_23"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 467.0168229309, "return_std": 110.0887819727, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000001600001", "id": "13830900_23"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 697.197171756, "return_std": 51.1289177193, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_stand", "policy_path": "13830900_23/step_000002500001", "id": "13830900_23"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 861.9819116727, "return_std": 77.9449911097, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000025001", "id": "13830900_24"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 946.3621362604, "return_std": 45.867720233200004, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000050001", "id": "13830900_24"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 938.4502258222, "return_std": 38.4269958061, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000100001", "id": "13830900_24"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 850.7367590825, "return_std": 45.4707141756, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000200001", "id": "13830900_24"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 711.509202028, "return_std": 56.0444920299, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000400001", "id": "13830900_24"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 700.2785608854, "return_std": 43.728063435, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000000800001", "id": "13830900_24"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 513.9922703472, "return_std": 45.7813423808, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000001600001", "id": "13830900_24"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 627.9458591173, "return_std": 70.7943011515, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_stand", "policy_path": "13830900_24/step_000002500001", "id": "13830900_24"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 708.3600430026, "return_std": 120.1958553384, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000025001", "id": "13830900_25"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 841.8246604552, "return_std": 71.672290193, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000050001", "id": "13830900_25"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 955.5190393501, "return_std": 24.4941987804, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000100001", "id": "13830900_25"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 929.2688302402, "return_std": 67.8663735149, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000200001", "id": "13830900_25"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 768.7653368891, "return_std": 129.978382206, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000400001", "id": "13830900_25"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 600.0458119466, "return_std": 70.1376969699, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000000800001", "id": "13830900_25"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 951.0975216803, "return_std": 30.4651075719, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000001600001", "id": "13830900_25"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 610.2206473588, "return_std": 69.4404217795, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 0, "task.task_name": "walker_walk", "policy_path": "13830900_25/step_000002500001", "id": "13830900_25"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 858.7941745617, "return_std": 80.6717444055, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000025001", "id": "13830900_26"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 918.7735560357, "return_std": 82.808140046, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000050001", "id": "13830900_26"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 945.9252939466, "return_std": 22.3345263731, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000100001", "id": "13830900_26"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 899.5339572613, "return_std": 60.0082309698, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000200001", "id": "13830900_26"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 780.1832638567, "return_std": 80.8674018988, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000400001", "id": "13830900_26"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 27.8691181271, "return_std": 18.4327418935, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000000800001", "id": "13830900_26"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 417.8034680239, "return_std": 109.5172779818, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000001600001", "id": "13830900_26"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 524.5199180198, "return_std": 58.1508424506, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 1, "task.task_name": "walker_walk", "policy_path": "13830900_26/step_000002500001", "id": "13830900_26"}, {"snapshot_name": "step_000000025001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 686.0229178922, "return_std": 143.2036283792, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000025001", "id": "13830900_27"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 872.0895573258, "return_std": 144.1378455074, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000050001", "id": "13830900_27"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 919.1397626198, "return_std": 58.1087004836, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000100001", "id": "13830900_27"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 950.8251137659, "return_std": 22.6475224269, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000200001", "id": "13830900_27"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 695.9604736541, "return_std": 62.5568375579, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000400001", "id": "13830900_27"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 640.9753632413, "return_std": 56.4247678431, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000000800001", "id": "13830900_27"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 529.3195165949, "return_std": 191.2771283482, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000001600001", "id": "13830900_27"}, {"snapshot_name": "step_000002500001", "cwp": false, "is_recurrent": false, "agent_name": "D4PG", "return_mean": 541.2163946617, "return_std": 63.7220523139, "agent.batch_size": 256, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": NaN, "networks.critic_lstm_sizes": null, "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": null, "seed": 2, "task.task_name": "walker_walk", "policy_path": "13830900_27/step_000002500001", "id": "13830900_27"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 562.184375, "return_std": 419.2444522048, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000000050001", "id": "13903268_1"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 543.9802083333, "return_std": 417.0027476872, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000000100001", "id": "13903268_1"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 528.90625, "return_std": 417.9250941492, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000000200001", "id": "13903268_1"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 493.5197916667, "return_std": 423.4303007919, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000000400001", "id": "13903268_1"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 443.6177083333, "return_std": 426.4536138605, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000000800001", "id": "13903268_1"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 435.3705263158, "return_std": 423.0088889135, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000001600001", "id": "13903268_1"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 385.9958333333, "return_std": 410.2613246683, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000003200001", "id": "13903268_1"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 399.6673684211, "return_std": 420.8395854468, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "13903268_1/step_000005000001", "id": "13903268_1"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 562.6427083333, "return_std": 420.0526686885, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000000050001", "id": "13903268_2"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 584.2863157895, "return_std": 404.7308490737, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000000100001", "id": "13903268_2"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 516.7884210526, "return_std": 420.8551914536, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000000200001", "id": "13903268_2"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 472.7520833333, "return_std": 417.8413953827, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000000400001", "id": "13903268_2"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 468.8052083333, "return_std": 416.4306252742, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000000800001", "id": "13903268_2"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 438.3084210526, "return_std": 420.2664349258, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000001600001", "id": "13903268_2"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 407.1354166667, "return_std": 410.5070244183, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000003200001", "id": "13903268_2"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 391.3760416667, "return_std": 412.8795889895, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "13903268_2/step_000005000001", "id": "13903268_2"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 526.9489583333, "return_std": 422.5464076653, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000000050001", "id": "13903268_3"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 594.7968421053, "return_std": 404.1437158834, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000000100001", "id": "13903268_3"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 573.3473684211, "return_std": 407.7073055067, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000000200001", "id": "13903268_3"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 516.0739583333, "return_std": 414.1142635849, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000000400001", "id": "13903268_3"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 464.36, "return_std": 421.8740827629, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000000800001", "id": "13903268_3"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 464.0378947368, "return_std": 423.2583063875, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000001600001", "id": "13903268_3"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 447.5357894737, "return_std": 418.5442434853, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000003200001", "id": "13903268_3"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 427.1852631579, "return_std": 418.7689160519, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "13903268_3/step_000005000001", "id": "13903268_3"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 169.5340215057, "return_std": 280.4514791419, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000000050001", "id": "13903268_4"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 312.2698771901, "return_std": 334.3230282283, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000000100001", "id": "13903268_4"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 346.6529599772, "return_std": 330.7600928679, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000000200001", "id": "13903268_4"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 311.9929294094, "return_std": 327.5034317067, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000000400001", "id": "13903268_4"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 318.4108452062, "return_std": 332.8336211147, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000000800001", "id": "13903268_4"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 264.7845668581, "return_std": 311.7484973872, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000001600001", "id": "13903268_4"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 257.5153580589, "return_std": 312.4020565865, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000003200001", "id": "13903268_4"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 236.9073258746, "return_std": 309.4731612602, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_4/step_000005000001", "id": "13903268_4"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 145.4847436136, "return_std": 254.2929551278, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000000050001", "id": "13903268_5"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 295.302356201, "return_std": 331.3313120366, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000000100001", "id": "13903268_5"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 336.201531529, "return_std": 332.8825670255, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000000200001", "id": "13903268_5"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 324.5188274143, "return_std": 337.0689302792, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000000400001", "id": "13903268_5"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 309.9531567956, "return_std": 332.9754366774, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000000800001", "id": "13903268_5"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 302.3322015548, "return_std": 332.6229369408, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000001600001", "id": "13903268_5"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 241.3321585037, "return_std": 312.9490948687, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000003200001", "id": "13903268_5"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 228.4900750668, "return_std": 299.5615794213, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_5/step_000005000001", "id": "13903268_5"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 171.412152622, "return_std": 280.79568508, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000000050001", "id": "13903268_6"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 302.6923188864, "return_std": 321.0857823539, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000000100001", "id": "13903268_6"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 331.9041749943, "return_std": 335.0122265562, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000000200001", "id": "13903268_6"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 341.4325308294, "return_std": 345.4307528341, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000000400001", "id": "13903268_6"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 324.4019752543, "return_std": 332.7408728887, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000000800001", "id": "13903268_6"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 279.3946441686, "return_std": 321.008044861, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000001600001", "id": "13903268_6"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 257.8212542551, "return_std": 318.1173885005, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000003200001", "id": "13903268_6"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 249.3151896714, "return_std": 316.5322471316, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "13903268_6/step_000005000001", "id": "13903268_6"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 90.8079459051, "return_std": 39.9707812257, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000000050001", "id": "13903268_7"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 136.8885302531, "return_std": 43.4298942084, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000000100001", "id": "13903268_7"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 164.8971649146, "return_std": 40.1933526894, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000000200001", "id": "13903268_7"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 208.9693234643, "return_std": 59.9170731956, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000000400001", "id": "13903268_7"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 249.4890270902, "return_std": 69.0901737878, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000000800001", "id": "13903268_7"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 270.3772800461, "return_std": 71.118075562, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000001600001", "id": "13903268_7"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 273.6560716395, "return_std": 72.2252799055, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000003200001", "id": "13903268_7"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 282.3941287548, "return_std": 78.1173335317, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "13903268_7/step_000005000001", "id": "13903268_7"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 110.2934368272, "return_std": 33.488598549, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000000050001", "id": "13903268_8"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 128.1858056196, "return_std": 37.4137897154, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000000100001", "id": "13903268_8"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 168.2249895201, "return_std": 50.0774587803, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000000200001", "id": "13903268_8"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 210.173306376, "return_std": 53.8925422302, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000000400001", "id": "13903268_8"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 266.6142920707, "return_std": 69.8458151878, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000000800001", "id": "13903268_8"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 271.4007852502, "return_std": 71.725142132, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000001600001", "id": "13903268_8"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 287.3425118197, "return_std": 81.2270044788, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000003200001", "id": "13903268_8"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 296.929034745, "return_std": 78.2868759132, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "13903268_8/step_000005000001", "id": "13903268_8"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 86.1784549416, "return_std": 47.9129202265, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000000050001", "id": "13903268_9"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 136.9379183141, "return_std": 36.6829673393, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000000100001", "id": "13903268_9"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 164.5356248325, "return_std": 47.4550102813, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000000200001", "id": "13903268_9"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 186.9632393137, "return_std": 50.2476486813, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000000400001", "id": "13903268_9"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 238.1917502636, "return_std": 60.5241940528, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000000800001", "id": "13903268_9"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 273.6702356548, "return_std": 72.7566113691, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000001600001", "id": "13903268_9"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 274.8698831042, "return_std": 77.4009219008, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000003200001", "id": "13903268_9"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 295.5929511555, "return_std": 79.4780491068, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "13903268_9/step_000005000001", "id": "13903268_9"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 649.1967490101, "return_std": 270.1694456796, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000000050001", "id": "13903268_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 690.3868979483, "return_std": 235.1364308148, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000000100001", "id": "13903268_10"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 592.15207999, "return_std": 254.2417875344, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000000200001", "id": "13903268_10"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 746.8280091427, "return_std": 224.3359119389, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000000400001", "id": "13903268_10"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 789.3312962431, "return_std": 181.0520890297, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000000800001", "id": "13903268_10"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 696.6903012756, "return_std": 218.8008423606, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000001600001", "id": "13903268_10"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 524.9389816438, "return_std": 237.9101534913, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000003200001", "id": "13903268_10"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 451.9429333213, "return_std": 232.6879490816, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "13903268_10/step_000005000001", "id": "13903268_10"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 545.0324094918, "return_std": 244.3434315601, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000000050001", "id": "13903268_11"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 573.3036653009, "return_std": 284.421899136, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000000100001", "id": "13903268_11"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 627.0656204962, "return_std": 242.8966559474, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000000200001", "id": "13903268_11"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 369.529217166, "return_std": 202.3199652819, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000000400001", "id": "13903268_11"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 830.788332281, "return_std": 114.4299148072, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000000800001", "id": "13903268_11"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 784.1969402609, "return_std": 168.4463795826, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000001600001", "id": "13903268_11"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 763.6537206531, "return_std": 182.5248059107, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000003200001", "id": "13903268_11"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 603.505235879, "return_std": 276.3136532522, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "13903268_11/step_000005000001", "id": "13903268_11"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 746.0236370306, "return_std": 199.9239215358, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000000050001", "id": "13903268_12"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 482.0367345572, "return_std": 241.0014083671, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000000100001", "id": "13903268_12"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 627.7137073731, "return_std": 230.8881261861, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000000200001", "id": "13903268_12"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 733.0484829736, "return_std": 222.0957149724, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000000400001", "id": "13903268_12"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 681.3849068484, "return_std": 205.5557016376, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000000800001", "id": "13903268_12"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 705.0587836512, "return_std": 233.9492627658, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000001600001", "id": "13903268_12"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 714.9199699515, "return_std": 218.1251297364, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000003200001", "id": "13903268_12"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 710.5871569518, "return_std": 224.50997284, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "13903268_12/step_000005000001", "id": "13903268_12"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 244.0489549492, "return_std": 157.5635366002, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000000050001", "id": "13933762_1"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 252.1944035503, "return_std": 171.4010790399, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000000100001", "id": "13933762_1"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 263.8920268549, "return_std": 189.1084277231, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000000200001", "id": "13933762_1"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 304.8485798293, "return_std": 218.9641620863, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000000400001", "id": "13933762_1"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 278.4582188249, "return_std": 193.9616513931, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000000800001", "id": "13933762_1"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 263.6491738508, "return_std": 165.7432354289, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000001600001", "id": "13933762_1"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 265.6654975001, "return_std": 175.1171143844, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000003200001", "id": "13933762_1"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 240.0624320657, "return_std": 154.4773256788, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "13933762_1/step_000005000001", "id": "13933762_1"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 252.6346879276, "return_std": 161.7337885039, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000000050001", "id": "13933762_2"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 278.4447063682, "return_std": 195.8197506835, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000000100001", "id": "13933762_2"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 343.5932662872, "return_std": 222.4905293828, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000000200001", "id": "13933762_2"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 318.4952233762, "return_std": 201.6691356708, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000000400001", "id": "13933762_2"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 282.1466219741, "return_std": 205.5498034602, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000000800001", "id": "13933762_2"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 275.39451035, "return_std": 192.0781707982, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000001600001", "id": "13933762_2"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 252.5623095528, "return_std": 165.07629366, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000003200001", "id": "13933762_2"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 238.4162685041, "return_std": 153.1902873766, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "13933762_2/step_000005000001", "id": "13933762_2"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 241.9220637396, "return_std": 161.0516435782, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000000050001", "id": "13933762_3"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 249.754095632, "return_std": 175.8260154662, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000000100001", "id": "13933762_3"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 301.4683721462, "return_std": 212.092704351, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000000200001", "id": "13933762_3"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 281.5166514443, "return_std": 200.8790238822, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000000400001", "id": "13933762_3"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 275.395052673, "return_std": 184.9201772676, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000000800001", "id": "13933762_3"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 259.4055350048, "return_std": 177.0435460454, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000001600001", "id": "13933762_3"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 220.2310776716, "return_std": 146.3659689031, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000003200001", "id": "13933762_3"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 223.1729208081, "return_std": 146.560371374, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "13933762_3/step_000005000001", "id": "13933762_3"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 512.800022501, "return_std": 272.7164369308, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000000050001", "id": "13933762_4"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 495.9063298437, "return_std": 290.2688753411, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000000100001", "id": "13933762_4"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 501.8042461665, "return_std": 282.7502608192, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000000200001", "id": "13933762_4"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 469.936784782, "return_std": 281.8841148177, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000000400001", "id": "13933762_4"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 496.1063453001, "return_std": 289.4835602253, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000000800001", "id": "13933762_4"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 482.3206415325, "return_std": 276.4213357091, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000001600001", "id": "13933762_4"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 499.5153284394, "return_std": 274.5630131455, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000003200001", "id": "13933762_4"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 507.2861391977, "return_std": 265.9659277005, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "13933762_4/step_000005000001", "id": "13933762_4"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 518.7153855397, "return_std": 273.9375962443, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000000050001", "id": "13933762_5"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 506.0917123851, "return_std": 282.9645915243, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000000100001", "id": "13933762_5"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 490.0561024503, "return_std": 287.1801536013, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000000200001", "id": "13933762_5"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 499.6531866011, "return_std": 288.7626454937, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000000400001", "id": "13933762_5"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 493.1040183403, "return_std": 280.3038872982, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000000800001", "id": "13933762_5"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 479.972873876, "return_std": 275.4320032215, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000001600001", "id": "13933762_5"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 493.5185926141, "return_std": 266.6934573103, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000003200001", "id": "13933762_5"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 467.9414282868, "return_std": 266.6026656344, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "13933762_5/step_000005000001", "id": "13933762_5"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 511.1447779497, "return_std": 271.4465964439, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000000050001", "id": "13933762_6"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 485.1603609489, "return_std": 283.7029014782, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000000100001", "id": "13933762_6"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 509.0323132123, "return_std": 274.6449514441, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000000200001", "id": "13933762_6"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 466.8160485712, "return_std": 278.1864683095, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000000400001", "id": "13933762_6"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 514.5087491392, "return_std": 267.1883370749, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000000800001", "id": "13933762_6"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 524.6166080138, "return_std": 264.2397049964, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000001600001", "id": "13933762_6"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 547.419888825, "return_std": 255.0083301107, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000003200001", "id": "13933762_6"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 518.5636364199, "return_std": 253.0305847966, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "13933762_6/step_000005000001", "id": "13933762_6"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 53.4668377102, "return_std": 219.3138469114, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000000050001", "id": "13933762_7"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 100.8010853979, "return_std": 259.4338151442, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000000100001", "id": "13933762_7"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 240.6402662424, "return_std": 321.8894677269, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000000200001", "id": "13933762_7"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 387.8310669799, "return_std": 345.3090969516, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000000400001", "id": "13933762_7"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 396.4265865912, "return_std": 349.4434746305, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000000800001", "id": "13933762_7"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 417.0420609475, "return_std": 348.6304199543, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000001600001", "id": "13933762_7"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 370.7925344068, "return_std": 357.7728360778, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000003200001", "id": "13933762_7"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 352.6268510889, "return_std": 350.0996554224, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_7/step_000005000001", "id": "13933762_7"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 62.9062965696, "return_std": 236.3461663077, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000000050001", "id": "13933762_8"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 101.43651321830001, "return_std": 254.3880917909, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000000100001", "id": "13933762_8"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 190.1552012388, "return_std": 319.8655114609, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000000200001", "id": "13933762_8"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 411.1319668745, "return_std": 350.6381762629, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000000400001", "id": "13933762_8"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 408.4759902345, "return_std": 352.981404488, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000000800001", "id": "13933762_8"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 384.7473506042, "return_std": 353.691692026, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000001600001", "id": "13933762_8"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 359.0152849816, "return_std": 346.6310931221, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000003200001", "id": "13933762_8"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 368.0352490679, "return_std": 358.0777489165, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_8/step_000005000001", "id": "13933762_8"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 53.7723362017, "return_std": 219.5949291772, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000000050001", "id": "13933762_9"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 118.1984423265, "return_std": 282.8625884442, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000000100001", "id": "13933762_9"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 267.0246667543, "return_std": 335.9242781969, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000000200001", "id": "13933762_9"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 384.1461070258, "return_std": 354.3323529285, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000000400001", "id": "13933762_9"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 408.9113506757, "return_std": 352.0089508352, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000000800001", "id": "13933762_9"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 366.9639304097, "return_std": 355.9868723441, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000001600001", "id": "13933762_9"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 380.266202427, "return_std": 358.2743406415, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000003200001", "id": "13933762_9"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 319.9322795811, "return_std": 347.3898451277, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "13933762_9/step_000005000001", "id": "13933762_9"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 698.1507067182, "return_std": 267.1673423527, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000000050001", "id": "13933762_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 654.5445809005, "return_std": 270.4346071583, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000000100001", "id": "13933762_10"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 574.6124573009, "return_std": 269.9028557709, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000000200001", "id": "13933762_10"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 554.0246866214, "return_std": 271.1602443992, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000000400001", "id": "13933762_10"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 518.4698347721, "return_std": 261.7622936891, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000000800001", "id": "13933762_10"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 514.8753669103, "return_std": 264.8792000101, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000001600001", "id": "13933762_10"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 488.1252012264, "return_std": 246.9322614665, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000003200001", "id": "13933762_10"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 475.9134697556, "return_std": 252.7505141365, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "13933762_10/step_000005000001", "id": "13933762_10"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 686.7428261832, "return_std": 266.4773967115, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000000050001", "id": "13933762_11"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 663.0591971774, "return_std": 263.4371059257, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000000100001", "id": "13933762_11"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 584.5242191001, "return_std": 256.9753815305, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000000200001", "id": "13933762_11"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 541.3157444176, "return_std": 273.6431119271, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000000400001", "id": "13933762_11"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 515.1302356457, "return_std": 263.6283862138, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000000800001", "id": "13933762_11"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 499.0081277511, "return_std": 262.476524889, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000001600001", "id": "13933762_11"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 484.3549141093, "return_std": 249.8934123333, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000003200001", "id": "13933762_11"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 480.9692149091, "return_std": 239.449051308, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "13933762_11/step_000005000001", "id": "13933762_11"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 706.0633541238, "return_std": 251.7230404714, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000000050001", "id": "13933762_12"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 640.2557408878, "return_std": 268.0585448828, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000000100001", "id": "13933762_12"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 589.8857397251, "return_std": 271.6195488777, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000000200001", "id": "13933762_12"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 518.9507754081, "return_std": 265.6215740404, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000000400001", "id": "13933762_12"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 532.9203039237, "return_std": 266.9902053116, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000000800001", "id": "13933762_12"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 496.1193727097, "return_std": 264.4797770605, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000001600001", "id": "13933762_12"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 486.3150577111, "return_std": 257.8141948196, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000003200001", "id": "13933762_12"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 483.0639900025, "return_std": 242.294231777, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "13933762_12/step_000005000001", "id": "13933762_12"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 848.9907430883, "return_std": 216.2115172211, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000000050001", "id": "13933762_13"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 826.2558761189, "return_std": 227.6151921887, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000000100001", "id": "13933762_13"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 738.569393973, "return_std": 285.1384216325, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000000200001", "id": "13933762_13"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 686.3753830012, "return_std": 301.976348703, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000000400001", "id": "13933762_13"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 673.0508848699, "return_std": 306.2411261048, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000000800001", "id": "13933762_13"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 637.2871966365, "return_std": 318.0479939287, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000001600001", "id": "13933762_13"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 630.3950974565, "return_std": 311.8406201356, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000003200001", "id": "13933762_13"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 610.7117434296, "return_std": 309.4973761147, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "13933762_13/step_000005000001", "id": "13933762_13"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 825.0297279446, "return_std": 251.40538247, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000000050001", "id": "13933762_14"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 805.9535468575, "return_std": 248.3046664606, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000000100001", "id": "13933762_14"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 736.8531960634, "return_std": 288.6444593013, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000000200001", "id": "13933762_14"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 672.7221271239, "return_std": 316.0981769559, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000000400001", "id": "13933762_14"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 668.972986593, "return_std": 303.4101277258, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000000800001", "id": "13933762_14"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 635.7063110578, "return_std": 313.6538489222, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000001600001", "id": "13933762_14"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 617.2397481745, "return_std": 319.7488817648, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000003200001", "id": "13933762_14"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 586.6173460412, "return_std": 321.4369011213, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "13933762_14/step_000005000001", "id": "13933762_14"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 852.5349127821, "return_std": 226.9851002042, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000000050001", "id": "13933762_15"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 791.0044296156, "return_std": 269.9636839795, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000000100001", "id": "13933762_15"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 734.7435170308, "return_std": 302.8441611332, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000000200001", "id": "13933762_15"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 714.3625928321, "return_std": 297.4770276289, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000000400001", "id": "13933762_15"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 671.713999812, "return_std": 313.362760058, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000000800001", "id": "13933762_15"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 627.6209749513, "return_std": 316.1934168025, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000001600001", "id": "13933762_15"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 624.7130614138, "return_std": 317.8969742888, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000003200001", "id": "13933762_15"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RABM", "return_mean": 604.0734233541, "return_std": 322.6720840394, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": null, "agent.policy_improvement_modes": null, "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "13933762_15/step_000005000001", "id": "13933762_15"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 607.703043022, "return_std": 435.6179216914, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000000050001", "id": "14010194_1"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 622.74, "return_std": 421.0561220724, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000000100001", "id": "14010194_1"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 605.2651991614, "return_std": 429.9365993239, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000000200001", "id": "14010194_1"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 475.7150368034, "return_std": 439.7939921743, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000000400001", "id": "14010194_1"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 393.9517313746, "return_std": 428.3812085661, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000000800001", "id": "14010194_1"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 343.3368200837, "return_std": 422.2695642155, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000001600001", "id": "14010194_1"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 359.3630705394, "return_std": 423.1971215176, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000003200001", "id": "14010194_1"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 355.7284210526, "return_std": 425.0792673028, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14010194_1/step_000005000001", "id": "14010194_1"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 618.333683106, "return_std": 427.4266459854, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000000050001", "id": "14010194_2"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 582.4291710388, "return_std": 429.4641686234, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000000100001", "id": "14010194_2"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 607.9642481598, "return_std": 419.3361354688, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000000200001", "id": "14010194_2"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 424.2376445846, "return_std": 433.5932999729, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000000400001", "id": "14010194_2"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 362.0564853556, "return_std": 430.0173221274, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000000800001", "id": "14010194_2"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 363.0324267782, "return_std": 426.4146373267, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000001600001", "id": "14010194_2"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 322.8335100742, "return_std": 414.0989359166, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000003200001", "id": "14010194_2"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 334.9779411765, "return_std": 420.9735980801, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14010194_2/step_000005000001", "id": "14010194_2"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 588.3717277487, "return_std": 436.9681145829, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000000050001", "id": "14010194_3"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 635.9326315789, "return_std": 423.2387788824, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000000100001", "id": "14010194_3"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 528.7701396348, "return_std": 441.6074679051, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000000200001", "id": "14010194_3"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 491.2726315789, "return_std": 441.7208164108, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000000400001", "id": "14010194_3"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 366.6977964323, "return_std": 431.0560256307, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000000800001", "id": "14010194_3"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 361.2293193717, "return_std": 418.1073492939, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000001600001", "id": "14010194_3"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 334.3793103448, "return_std": 421.3691313864, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000003200001", "id": "14010194_3"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 351.22, "return_std": 426.5082244972, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14010194_3/step_000005000001", "id": "14010194_3"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 125.0979305957, "return_std": 254.9469065047, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000000050001", "id": "14010194_4"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 290.3922639664, "return_std": 343.8646165306, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000000100001", "id": "14010194_4"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 342.6061328041, "return_std": 350.0782913452, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000000200001", "id": "14010194_4"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 396.3709497201, "return_std": 357.7664423993, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000000400001", "id": "14010194_4"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 342.6733364409, "return_std": 347.8900314509, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000000800001", "id": "14010194_4"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 332.0976597459, "return_std": 343.6621613906, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000001600001", "id": "14010194_4"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 335.6959358267, "return_std": 336.5204832184, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000003200001", "id": "14010194_4"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 318.7724750565, "return_std": 339.8175054579, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_4/step_000005000001", "id": "14010194_4"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 149.6562124952, "return_std": 279.4900936711, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000000050001", "id": "14010194_5"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 266.4745921071, "return_std": 330.59600204, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000000100001", "id": "14010194_5"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 337.2433736014, "return_std": 346.5057431487, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000000200001", "id": "14010194_5"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 386.7069796449, "return_std": 363.2740031021, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000000400001", "id": "14010194_5"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 329.9894030322, "return_std": 339.1451340254, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000000800001", "id": "14010194_5"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 326.9205405787, "return_std": 342.0504897709, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000001600001", "id": "14010194_5"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 324.5966964984, "return_std": 336.7362130002, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000003200001", "id": "14010194_5"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 315.15253611, "return_std": 339.6191441481, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_5/step_000005000001", "id": "14010194_5"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 140.7213952993, "return_std": 269.116211926, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000000050001", "id": "14010194_6"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 190.6115777182, "return_std": 294.3286210584, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000000100001", "id": "14010194_6"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 359.1409006158, "return_std": 356.2675405727, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000000200001", "id": "14010194_6"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 386.1449847099, "return_std": 344.9927106467, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000000400001", "id": "14010194_6"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 344.750832951, "return_std": 345.455098198, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000000800001", "id": "14010194_6"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 331.3979175648, "return_std": 343.2871498308, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000001600001", "id": "14010194_6"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 310.9251827162, "return_std": 331.0797403732, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000003200001", "id": "14010194_6"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 306.0176100305, "return_std": 331.5414034287, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14010194_6/step_000005000001", "id": "14010194_6"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 212.277429261, "return_std": 118.2722055916, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000000050001", "id": "14010194_7"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 345.0890314329, "return_std": 113.2749385002, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000000100001", "id": "14010194_7"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 393.7828795081, "return_std": 125.4762333541, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000000200001", "id": "14010194_7"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 503.6973488926, "return_std": 107.3599851069, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000000400001", "id": "14010194_7"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 550.2408188909, "return_std": 95.4674548345, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000000800001", "id": "14010194_7"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 572.8740374986, "return_std": 85.1431891965, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000001600001", "id": "14010194_7"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 568.3028806841, "return_std": 94.5671573533, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000003200001", "id": "14010194_7"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 571.2192008037, "return_std": 96.7985649849, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14010194_7/step_000005000001", "id": "14010194_7"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 259.7022360266, "return_std": 141.4894694081, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000000050001", "id": "14010194_8"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 312.1005081818, "return_std": 113.8149268567, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000000100001", "id": "14010194_8"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 456.7392379909, "return_std": 122.0679611154, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000000200001", "id": "14010194_8"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 525.6244095079, "return_std": 115.3756003559, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000000400001", "id": "14010194_8"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 572.8353427007, "return_std": 91.5783222192, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000000800001", "id": "14010194_8"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 549.1610259521, "return_std": 98.4428681515, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000001600001", "id": "14010194_8"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 562.632814259, "return_std": 95.0146943802, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000003200001", "id": "14010194_8"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 566.4769719463, "return_std": 99.069299821, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14010194_8/step_000005000001", "id": "14010194_8"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 218.405955869, "return_std": 130.9357813411, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000000050001", "id": "14010194_9"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 339.4336339932, "return_std": 128.5138037614, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000000100001", "id": "14010194_9"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 347.8167423517, "return_std": 98.6972159375, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000000200001", "id": "14010194_9"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 497.6440664645, "return_std": 98.6544450542, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000000400001", "id": "14010194_9"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 560.0345639417, "return_std": 114.9715092582, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000000800001", "id": "14010194_9"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 548.1716471601, "return_std": 110.1551650247, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000001600001", "id": "14010194_9"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 574.8875625744, "return_std": 101.3917456381, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000003200001", "id": "14010194_9"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 567.7892488448, "return_std": 96.8607379652, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14010194_9/step_000005000001", "id": "14010194_9"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 362.5508226606, "return_std": 154.5340090425, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000000050001", "id": "14010194_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 435.3710973676, "return_std": 193.2439789041, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000000100001", "id": "14010194_10"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 576.855331693, "return_std": 222.6969412096, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000000200001", "id": "14010194_10"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 508.9569154194, "return_std": 269.981070786, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000000400001", "id": "14010194_10"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 594.8030224675, "return_std": 235.6038836157, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000000800001", "id": "14010194_10"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 572.6511651423, "return_std": 251.4827732268, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000001600001", "id": "14010194_10"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 586.3491526061, "return_std": 259.1940921343, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000003200001", "id": "14010194_10"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 553.6761019309, "return_std": 271.2433120963, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14010194_10/step_000005000001", "id": "14010194_10"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 477.6897156033, "return_std": 213.7391571812, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000000050001", "id": "14010194_11"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 618.1421199272, "return_std": 238.3584070913, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000000100001", "id": "14010194_11"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 318.6752086512, "return_std": 165.0314903283, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000000200001", "id": "14010194_11"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 691.9287026551, "return_std": 225.9738884438, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000000400001", "id": "14010194_11"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 588.0310738739, "return_std": 243.275542907, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000000800001", "id": "14010194_11"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 571.3906548283, "return_std": 262.336603966, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000001600001", "id": "14010194_11"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 530.1296100542, "return_std": 256.9138978576, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000003200001", "id": "14010194_11"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 532.0063012255, "return_std": 256.4758920498, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14010194_11/step_000005000001", "id": "14010194_11"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 438.2023733144, "return_std": 170.2192931034, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000000050001", "id": "14010194_12"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 611.4319590741, "return_std": 241.1159284174, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000000100001", "id": "14010194_12"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 693.0785401817, "return_std": 219.6331054157, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000000200001", "id": "14010194_12"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 547.3465367179, "return_std": 221.1770555733, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000000400001", "id": "14010194_12"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 530.440976178, "return_std": 255.7432364558, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000000800001", "id": "14010194_12"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 602.4026232861, "return_std": 249.8489349471, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000001600001", "id": "14010194_12"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 577.9445766725, "return_std": 258.4342107473, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000003200001", "id": "14010194_12"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 573.9723556626, "return_std": 265.4157084779, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14010194_12/step_000005000001", "id": "14010194_12"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 499.9227518794, "return_std": 253.1832980646, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000000050001", "id": "14010194_13"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 382.6187275075, "return_std": 241.1345625897, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000000100001", "id": "14010194_13"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 364.29067862, "return_std": 223.8375944709, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000000200001", "id": "14010194_13"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 387.2952644777, "return_std": 232.6540279906, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000000400001", "id": "14010194_13"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 346.6646198118, "return_std": 230.6986234521, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000000800001", "id": "14010194_13"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 340.4481083931, "return_std": 217.4923641764, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000001600001", "id": "14010194_13"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 316.4472347742, "return_std": 207.0775501198, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000003200001", "id": "14010194_13"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 332.0549151652, "return_std": 217.3980208909, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14010194_13/step_000005000001", "id": "14010194_13"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 541.5118427531, "return_std": 315.8456953055, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000000050001", "id": "14010194_14"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 479.0189338544, "return_std": 295.2516891014, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000000100001", "id": "14010194_14"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 422.5808951747, "return_std": 272.3564103541, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000000200001", "id": "14010194_14"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 375.124255175, "return_std": 231.2291762979, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000000400001", "id": "14010194_14"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 320.2663548363, "return_std": 225.5065752689, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000000800001", "id": "14010194_14"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 360.6196780049, "return_std": 224.3694526134, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000001600001", "id": "14010194_14"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 348.56661606, "return_std": 222.613885664, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000003200001", "id": "14010194_14"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 351.0124451707, "return_std": 227.3128027668, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14010194_14/step_000005000001", "id": "14010194_14"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 597.2767693647, "return_std": 208.6453200739, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000000050001", "id": "14010194_15"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 500.6676794461, "return_std": 292.2169653143, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000000100001", "id": "14010194_15"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 455.8292368053, "return_std": 260.1723610585, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000000200001", "id": "14010194_15"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 368.0709915665, "return_std": 236.4479911612, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000000400001", "id": "14010194_15"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 350.8653062349, "return_std": 217.6748435711, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000000800001", "id": "14010194_15"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 339.3298103465, "return_std": 219.7225076827, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000001600001", "id": "14010194_15"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 363.9552715391, "return_std": 223.5551611953, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000003200001", "id": "14010194_15"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 346.7835980363, "return_std": 215.5658700712, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14010194_15/step_000005000001", "id": "14010194_15"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 443.8268623683, "return_std": 281.6819922681, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000000050001", "id": "14010194_16"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 503.2020409182, "return_std": 277.1903972123, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000000100001", "id": "14010194_16"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 477.1730014691, "return_std": 286.6391741694, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000000200001", "id": "14010194_16"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 455.880655332, "return_std": 285.8299826105, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000000400001", "id": "14010194_16"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 481.499062269, "return_std": 281.493429514, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000000800001", "id": "14010194_16"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 475.6844047173, "return_std": 282.5174729445, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000001600001", "id": "14010194_16"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 455.2023054364, "return_std": 285.0421080661, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000003200001", "id": "14010194_16"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 451.0549925184, "return_std": 282.1935520109, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14010194_16/step_000005000001", "id": "14010194_16"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 489.0817648332, "return_std": 291.5277588344, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000000050001", "id": "14010194_17"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 458.0500815353, "return_std": 282.0051979682, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000000100001", "id": "14010194_17"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 499.5078521649, "return_std": 286.702492452, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000000200001", "id": "14010194_17"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 499.2349563642, "return_std": 276.996677922, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000000400001", "id": "14010194_17"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 486.4043530356, "return_std": 274.7939218785, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000000800001", "id": "14010194_17"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 486.3978544257, "return_std": 271.4055519872, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000001600001", "id": "14010194_17"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 476.5291802528, "return_std": 269.8345118244, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000003200001", "id": "14010194_17"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 466.5498236406, "return_std": 268.520507238, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14010194_17/step_000005000001", "id": "14010194_17"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 464.2659235811, "return_std": 290.0470717126, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000000050001", "id": "14010194_18"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 485.2142753877, "return_std": 291.0854719443, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000000100001", "id": "14010194_18"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 500.2334330296, "return_std": 285.5087525001, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000000200001", "id": "14010194_18"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 481.8245133699, "return_std": 286.296699254, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000000400001", "id": "14010194_18"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 479.9950086776, "return_std": 278.9030838068, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000000800001", "id": "14010194_18"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 476.3759014257, "return_std": 273.2161329481, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000001600001", "id": "14010194_18"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 486.3086547884, "return_std": 275.6951233282, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000003200001", "id": "14010194_18"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 484.309072248, "return_std": 276.8348007291, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14010194_18/step_000005000001", "id": "14010194_18"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 100.9708365712, "return_std": 274.5819807908, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000000050001", "id": "14010194_19"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 348.4306488596, "return_std": 370.6532415026, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000000100001", "id": "14010194_19"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 532.3413777112, "return_std": 350.5493196123, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000000200001", "id": "14010194_19"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 574.8218148727, "return_std": 323.6281744462, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000000400001", "id": "14010194_19"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 509.304286139, "return_std": 348.0041564579, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000000800001", "id": "14010194_19"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 492.8350073014, "return_std": 348.4381427793, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000001600001", "id": "14010194_19"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 463.6768398275, "return_std": 357.3511430402, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000003200001", "id": "14010194_19"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 452.0329920143, "return_std": 355.0329559748, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_19/step_000005000001", "id": "14010194_19"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 134.5992807105, "return_std": 297.6891284068, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000000050001", "id": "14010194_20"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 393.7688765164, "return_std": 377.1224941537, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000000100001", "id": "14010194_20"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 514.6359940673, "return_std": 358.1464037552, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000000200001", "id": "14010194_20"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 584.8457253394, "return_std": 319.2887302511, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000000400001", "id": "14010194_20"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 554.669584583, "return_std": 333.3922309166, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000000800001", "id": "14010194_20"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 512.1424678882, "return_std": 344.5913338174, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000001600001", "id": "14010194_20"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 482.1321367315, "return_std": 347.4757195405, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000003200001", "id": "14010194_20"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 464.2261889159, "return_std": 346.755761268, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_20/step_000005000001", "id": "14010194_20"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 132.4372150941, "return_std": 290.2278668565, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000000050001", "id": "14010194_21"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 386.0242663855, "return_std": 369.0542732366, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000000100001", "id": "14010194_21"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 529.5378446649, "return_std": 346.7625777426, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000000200001", "id": "14010194_21"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 596.6883217264, "return_std": 316.1075210188, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000000400001", "id": "14010194_21"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 548.7747003428, "return_std": 339.4262988338, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000000800001", "id": "14010194_21"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 479.0167358278, "return_std": 353.90646149, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000001600001", "id": "14010194_21"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 448.4697547275, "return_std": 356.8459197284, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000003200001", "id": "14010194_21"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 453.7612515609, "return_std": 356.5736705884, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14010194_21/step_000005000001", "id": "14010194_21"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 682.1760178939, "return_std": 266.5217665033, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000000050001", "id": "14010194_22"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 596.4181090183, "return_std": 272.066187233, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000000100001", "id": "14010194_22"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 534.3943587816, "return_std": 264.029470983, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000000200001", "id": "14010194_22"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 496.7535579856, "return_std": 261.7376244229, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000000400001", "id": "14010194_22"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 460.5104497223, "return_std": 247.1141777881, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000000800001", "id": "14010194_22"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 450.6652580301, "return_std": 254.6031330824, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000001600001", "id": "14010194_22"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 438.4974519951, "return_std": 245.0480734365, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000003200001", "id": "14010194_22"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 424.4856836902, "return_std": 229.6309368227, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14010194_22/step_000005000001", "id": "14010194_22"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 659.8091699542, "return_std": 272.7967268731, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000000050001", "id": "14010194_23"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 564.3612437053, "return_std": 273.0868288782, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000000100001", "id": "14010194_23"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 524.2781847475, "return_std": 283.6656175402, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000000200001", "id": "14010194_23"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 462.261907133, "return_std": 274.4051271409, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000000400001", "id": "14010194_23"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 434.2212376246, "return_std": 262.9236170968, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000000800001", "id": "14010194_23"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 419.6089387196, "return_std": 269.3362063503, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000001600001", "id": "14010194_23"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 386.032984162, "return_std": 234.4100041276, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000003200001", "id": "14010194_23"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 403.4260236996, "return_std": 248.4985586136, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14010194_23/step_000005000001", "id": "14010194_23"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 672.8541534328, "return_std": 256.2199238347, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000000050001", "id": "14010194_24"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 556.8453783999, "return_std": 263.3737591426, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000000100001", "id": "14010194_24"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 468.8644332086, "return_std": 264.0104203733, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000000200001", "id": "14010194_24"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 439.264094109, "return_std": 255.6050497095, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000000400001", "id": "14010194_24"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 421.8103268055, "return_std": 256.0414360591, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000000800001", "id": "14010194_24"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 401.2702907189, "return_std": 254.2755663889, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000001600001", "id": "14010194_24"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 380.8690457625, "return_std": 251.8394348945, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000003200001", "id": "14010194_24"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 390.3605354583, "return_std": 251.1477061761, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14010194_24/step_000005000001", "id": "14010194_24"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 803.2983671743, "return_std": 260.3586107444, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000000050001", "id": "14010194_25"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 684.0582793339, "return_std": 296.4554692422, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000000100001", "id": "14010194_25"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 629.6482468763, "return_std": 327.6548829843, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000000200001", "id": "14010194_25"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 581.0067645313, "return_std": 330.8372951768, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000000400001", "id": "14010194_25"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 506.8683427847, "return_std": 344.6586823007, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000000800001", "id": "14010194_25"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 529.1530257624, "return_std": 340.5162499182, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000001600001", "id": "14010194_25"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 529.6260219084, "return_std": 346.3489610927, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000003200001", "id": "14010194_25"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 506.1716162668, "return_std": 342.479750771, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14010194_25/step_000005000001", "id": "14010194_25"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 789.3377557689, "return_std": 264.4740875286, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000000050001", "id": "14010194_26"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 707.5446052502, "return_std": 296.8628450829, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000000100001", "id": "14010194_26"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 620.6708726517, "return_std": 320.3940293612, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000000200001", "id": "14010194_26"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 564.299318529, "return_std": 339.8638951998, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000000400001", "id": "14010194_26"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 524.766862349, "return_std": 340.3460376455, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000000800001", "id": "14010194_26"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 514.0439593438, "return_std": 340.0733811931, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000001600001", "id": "14010194_26"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 492.2218561666, "return_std": 347.2482457796, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000003200001", "id": "14010194_26"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 466.823849808, "return_std": 336.1986244325, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14010194_26/step_000005000001", "id": "14010194_26"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 804.8458422162, "return_std": 245.9135775706, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000000050001", "id": "14010194_27"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 731.9168711267, "return_std": 286.4451447924, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000000100001", "id": "14010194_27"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 615.7352093725, "return_std": 324.6274810636, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000000200001", "id": "14010194_27"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 592.9581532902, "return_std": 333.3221950181, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000000400001", "id": "14010194_27"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 543.771799459, "return_std": 335.4843232485, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000000800001", "id": "14010194_27"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 559.2768688685, "return_std": 332.2262940369, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000001600001", "id": "14010194_27"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 541.0489168256, "return_std": 323.1246131247, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000003200001", "id": "14010194_27"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "RCRR exp", "return_mean": 546.6127660983, "return_std": 322.470792281, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "exp", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14010194_27/step_000005000001", "id": "14010194_27"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 173.771875, "return_std": 353.4210152428, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000000050001", "id": "14022681_1"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 205.2443298969, "return_std": 376.5805564199, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000000100001", "id": "14022681_1"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 198.8773195876, "return_std": 364.2333940294, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000000200001", "id": "14022681_1"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 239.20625, "return_std": 389.5018732194, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000000400001", "id": "14022681_1"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 236.2571428571, "return_std": 391.7404463721, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000000800001", "id": "14022681_1"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 241.7917525773, "return_std": 382.8195649825, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000001600001", "id": "14022681_1"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 256.0979381443, "return_std": 394.7673841794, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000003200001", "id": "14022681_1"}, {"snapshot_name": "step_000004900001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 266.3490759754, "return_std": 399.0487704356, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "finger_turn_hard", "policy_path": "14022681_1/step_000004900001", "id": "14022681_1"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 207.2154006243, "return_std": 374.9377204007, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000000050001", "id": "14022681_2"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 200.915625, "return_std": 369.2605215981, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000000100001", "id": "14022681_2"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 196.2084623323, "return_std": 370.8074809163, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000000200001", "id": "14022681_2"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 225.2336734694, "return_std": 384.2584909059, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000000400001", "id": "14022681_2"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 240.143877551, "return_std": 388.2311852893, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000000800001", "id": "14022681_2"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 250.6384928717, "return_std": 389.8474526731, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000001600001", "id": "14022681_2"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 233.624742268, "return_std": 388.8002230224, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000003200001", "id": "14022681_2"}, {"snapshot_name": "step_000004850001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 248.7649484536, "return_std": 390.4194357593, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "finger_turn_hard", "policy_path": "14022681_2/step_000004850001", "id": "14022681_2"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 187.5742268041, "return_std": 359.7369138961, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000000050001", "id": "14022681_3"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 186.1773195876, "return_std": 369.2919780019, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000000100001", "id": "14022681_3"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 211.1649484536, "return_std": 378.8474177724, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000000200001", "id": "14022681_3"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 255.5190133607, "return_std": 403.0118003386, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000000400001", "id": "14022681_3"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 238.4175257732, "return_std": 389.4945361709, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000000800001", "id": "14022681_3"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 233.5412371134, "return_std": 385.1628994102, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000001600001", "id": "14022681_3"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 241.2360248447, "return_std": 391.541942097, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000003200001", "id": "14022681_3"}, {"snapshot_name": "step_000004900001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 235.8298969072, "return_std": 384.1876542754, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "finger_turn_hard", "policy_path": "14022681_3/step_000004900001", "id": "14022681_3"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 226.6780130271, "return_std": 309.5557334376, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000000050001", "id": "14022681_4"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 265.6063317225, "return_std": 337.9948169616, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000000100001", "id": "14022681_4"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 330.8086763575, "return_std": 349.4768908631, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000000200001", "id": "14022681_4"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 319.8570600683, "return_std": 340.8286547031, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000000400001", "id": "14022681_4"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 294.5443063921, "return_std": 330.8664988918, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000000800001", "id": "14022681_4"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 298.6704181381, "return_std": 332.8160230021, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000001600001", "id": "14022681_4"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 272.2348557912, "return_std": 322.7096638081, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000003200001", "id": "14022681_4"}, {"snapshot_name": "step_000004150001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 268.7103354548, "return_std": 316.7119062062, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_4/step_000004150001", "id": "14022681_4"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 229.7897485961, "return_std": 319.9642914125, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000000050001", "id": "14022681_5"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 320.0803865328, "return_std": 347.0368356692, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000000100001", "id": "14022681_5"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 299.2264229158, "return_std": 334.2390771661, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000000200001", "id": "14022681_5"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 281.888390365, "return_std": 325.9954122958, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000000400001", "id": "14022681_5"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 315.7262791787, "return_std": 339.9377802813, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000000800001", "id": "14022681_5"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 293.3701341571, "return_std": 326.7586888326, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000001600001", "id": "14022681_5"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 255.3954980891, "return_std": 320.3992210675, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000003200001", "id": "14022681_5"}, {"snapshot_name": "step_000004600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 284.2314933979, "return_std": 325.9490024946, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_5/step_000004600001", "id": "14022681_5"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 247.0559263563, "return_std": 333.5779033628, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000000050001", "id": "14022681_6"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 269.705346613, "return_std": 332.7043302389, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000000100001", "id": "14022681_6"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 341.1995092614, "return_std": 350.5231815269, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000000200001", "id": "14022681_6"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 296.2149431366, "return_std": 334.348930627, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000000400001", "id": "14022681_6"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 298.7283922963, "return_std": 334.7649200002, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000000800001", "id": "14022681_6"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 273.8670703092, "return_std": 324.3669493535, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000001600001", "id": "14022681_6"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 273.1634923709, "return_std": 317.346132981, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000003200001", "id": "14022681_6"}, {"snapshot_name": "step_000004150001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 259.3607395411, "return_std": 319.1755606449, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_peg", "policy_path": "14022681_6/step_000004150001", "id": "14022681_6"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 19.6854066245, "return_std": 20.7058047767, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000000050001", "id": "14022681_7"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 12.8665297069, "return_std": 12.2546511966, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000000100001", "id": "14022681_7"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 11.5076742964, "return_std": 11.3049159481, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000000200001", "id": "14022681_7"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 309.4121851328, "return_std": 146.032045313, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000000400001", "id": "14022681_7"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 347.9743596438, "return_std": 157.2922251036, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000000800001", "id": "14022681_7"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 372.2045295785, "return_std": 153.1702635084, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000001600001", "id": "14022681_7"}, {"snapshot_name": "step_000002800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 371.3528727953, "return_std": 151.5257383285, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "humanoid_run", "policy_path": "14022681_7/step_000002800001", "id": "14022681_7"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 157.8365097374, "return_std": 122.5677165056, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000000050001", "id": "14022681_8"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 212.9367714663, "return_std": 144.1523667355, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000000100001", "id": "14022681_8"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 236.789177944, "return_std": 128.4510125407, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000000200001", "id": "14022681_8"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 334.954402978, "return_std": 165.6440764412, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000000400001", "id": "14022681_8"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 351.3007095514, "return_std": 163.1669178571, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000000800001", "id": "14022681_8"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 374.5055515548, "return_std": 168.768696639, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000001600001", "id": "14022681_8"}, {"snapshot_name": "step_000002850001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 372.8007160751, "return_std": 163.0539996503, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "humanoid_run", "policy_path": "14022681_8/step_000002850001", "id": "14022681_8"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 41.2290660709, "return_std": 48.3822009151, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000000050001", "id": "14022681_9"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 40.4084920082, "return_std": 49.3694309174, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000000100001", "id": "14022681_9"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 251.2255539696, "return_std": 146.3583697358, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000000200001", "id": "14022681_9"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 301.7535594846, "return_std": 172.6568128188, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000000400001", "id": "14022681_9"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 353.7195038149, "return_std": 167.7708055987, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000000800001", "id": "14022681_9"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 350.1741528579, "return_std": 159.1331500982, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000001600001", "id": "14022681_9"}, {"snapshot_name": "step_000002800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 368.1738827912, "return_std": 158.8578824238, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "humanoid_run", "policy_path": "14022681_9/step_000002800001", "id": "14022681_9"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 318.3443101129, "return_std": 184.684375129, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000000050001", "id": "14022681_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 207.2276723611, "return_std": 93.2031673751, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000000100001", "id": "14022681_10"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 314.770949377, "return_std": 190.6327388126, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000000200001", "id": "14022681_10"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 338.2292373775, "return_std": 256.3205800101, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000000400001", "id": "14022681_10"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 377.7241028003, "return_std": 255.3606860186, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000000800001", "id": "14022681_10"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 397.7256666684, "return_std": 261.9244944317, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000001600001", "id": "14022681_10"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 358.4612295908, "return_std": 259.3094820866, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000003200001", "id": "14022681_10"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 358.3689411105, "return_std": 270.4635066558, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cartpole_swingup", "policy_path": "14022681_10/step_000005000001", "id": "14022681_10"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 375.1477852033, "return_std": 217.1936877987, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000000100001", "id": "14022681_11"}, {"snapshot_name": "step_000000150001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 359.5606536904, "return_std": 238.5494333255, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000000150001", "id": "14022681_11"}, {"snapshot_name": "step_000000250001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 337.8862587014, "return_std": 225.4167400797, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000000250001", "id": "14022681_11"}, {"snapshot_name": "step_000000450001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 356.976324083, "return_std": 251.7937838413, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000000450001", "id": "14022681_11"}, {"snapshot_name": "step_000000850001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 343.9742945613, "return_std": 252.685757907, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000000850001", "id": "14022681_11"}, {"snapshot_name": "step_000001650001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 292.188990522, "return_std": 227.4026086434, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000001650001", "id": "14022681_11"}, {"snapshot_name": "step_000003250001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 305.0704773085, "return_std": 233.946092997, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000003250001", "id": "14022681_11"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 334.9196004144, "return_std": 233.2083172281, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cartpole_swingup", "policy_path": "14022681_11/step_000005000001", "id": "14022681_11"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 299.2685057007, "return_std": 164.8743588992, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000000050001", "id": "14022681_12"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 282.9930271647, "return_std": 167.9676535497, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000000100001", "id": "14022681_12"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 328.9589370756, "return_std": 216.8313625443, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000000200001", "id": "14022681_12"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 314.9833428066, "return_std": 236.0334860503, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000000400001", "id": "14022681_12"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 383.8654689182, "return_std": 250.091013779, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000000800001", "id": "14022681_12"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 379.975944816, "return_std": 274.8547934442, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000001600001", "id": "14022681_12"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 384.1672712332, "return_std": 266.3208147427, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000003200001", "id": "14022681_12"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 376.8571170536, "return_std": 262.8894081454, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cartpole_swingup", "policy_path": "14022681_12/step_000005000001", "id": "14022681_12"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 413.6688289014, "return_std": 259.2650988841, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000000050001", "id": "14022681_13"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 469.6793715605, "return_std": 231.2537595855, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000000100001", "id": "14022681_13"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 347.190815339, "return_std": 230.4161513597, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000000200001", "id": "14022681_13"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 291.8505234002, "return_std": 190.4996030236, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000000400001", "id": "14022681_13"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 292.7874004629, "return_std": 193.2934007361, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000000800001", "id": "14022681_13"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 303.3654988258, "return_std": 203.7696740282, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000001600001", "id": "14022681_13"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 275.4031355862, "return_std": 188.1873297707, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000003200001", "id": "14022681_13"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 289.9700827394, "return_std": 195.9242951198, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "cheetah_run", "policy_path": "14022681_13/step_000005000001", "id": "14022681_13"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 353.755267486, "return_std": 213.7142314192, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000000050001", "id": "14022681_14"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 367.4683288108, "return_std": 236.9957524063, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000000100001", "id": "14022681_14"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 385.552686342, "return_std": 242.9848508574, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000000200001", "id": "14022681_14"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 355.0252146824, "return_std": 220.0678179164, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000000400001", "id": "14022681_14"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 337.29390143, "return_std": 196.5346119414, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000000800001", "id": "14022681_14"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 325.8566484101, "return_std": 201.4874088913, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000001600001", "id": "14022681_14"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 322.102428244, "return_std": 201.2188544363, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000003200001", "id": "14022681_14"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 327.1344236838, "return_std": 205.7130202537, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "cheetah_run", "policy_path": "14022681_14/step_000005000001", "id": "14022681_14"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 402.5386358416, "return_std": 265.4397894909, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000000050001", "id": "14022681_15"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 402.886123706, "return_std": 244.5412264205, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000000100001", "id": "14022681_15"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 352.4913730698, "return_std": 224.8107561348, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000000200001", "id": "14022681_15"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 314.9898558483, "return_std": 203.3902458193, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000000400001", "id": "14022681_15"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 327.2299927419, "return_std": 211.4582749881, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000000800001", "id": "14022681_15"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 323.6606890004, "return_std": 208.2724318438, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000001600001", "id": "14022681_15"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 324.1134949509, "return_std": 208.4239328594, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000003200001", "id": "14022681_15"}, {"snapshot_name": "step_000004950001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 293.3226185063, "return_std": 199.5988360618, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "cheetah_run", "policy_path": "14022681_15/step_000004950001", "id": "14022681_15"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 438.2240247319, "return_std": 282.3848326168, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000000050001", "id": "14022681_16"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 474.3471549083, "return_std": 278.2252567595, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000000100001", "id": "14022681_16"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 474.5202878539, "return_std": 281.2578474356, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000000200001", "id": "14022681_16"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 472.6283594593, "return_std": 277.2351504747, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000000400001", "id": "14022681_16"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 460.1820534729, "return_std": 275.4569274434, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000000800001", "id": "14022681_16"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 466.7620775769, "return_std": 274.5030920804, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000001600001", "id": "14022681_16"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 472.6906713344, "return_std": 265.5475278119, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000003200001", "id": "14022681_16"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 456.037310074, "return_std": 271.638222877, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "fish_swim", "policy_path": "14022681_16/step_000005000001", "id": "14022681_16"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 411.8129123156, "return_std": 282.1826394178, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000000050001", "id": "14022681_17"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 448.1045142725, "return_std": 281.8706043693, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000000100001", "id": "14022681_17"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 470.3582370208, "return_std": 282.7197758483, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000000200001", "id": "14022681_17"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 430.0943929109, "return_std": 271.0570195633, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000000400001", "id": "14022681_17"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 448.5867483331, "return_std": 264.8088340841, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000000800001", "id": "14022681_17"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 429.9388808745, "return_std": 272.4176105298, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000001600001", "id": "14022681_17"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 430.9963066269, "return_std": 266.5920264651, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000003200001", "id": "14022681_17"}, {"snapshot_name": "step_000004700001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 433.420754454, "return_std": 266.8390808372, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "fish_swim", "policy_path": "14022681_17/step_000004700001", "id": "14022681_17"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 425.4294812849, "return_std": 277.0162316221, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000000050001", "id": "14022681_18"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 352.0171027394, "return_std": 276.9195711789, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000000100001", "id": "14022681_18"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 83.3422686971, "return_std": 103.6347563079, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000000200001", "id": "14022681_18"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 452.3246608611, "return_std": 271.8837435568, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000000400001", "id": "14022681_18"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 435.6724704105, "return_std": 272.3283029428, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000000800001", "id": "14022681_18"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 445.1658776105, "return_std": 274.2278865098, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000001600001", "id": "14022681_18"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 422.6303401316, "return_std": 278.895351124, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000003200001", "id": "14022681_18"}, {"snapshot_name": "step_000004950001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 478.3346265894, "return_std": 271.7029460564, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "fish_swim", "policy_path": "14022681_18/step_000004950001", "id": "14022681_18"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 114.3156047592, "return_std": 278.766745392, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000000050001", "id": "14022681_19"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 274.2246122119, "return_std": 370.0689831265, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000000100001", "id": "14022681_19"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 350.5075140603, "return_std": 371.1282769034, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000000200001", "id": "14022681_19"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 350.9186305459, "return_std": 364.2958014964, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000000400001", "id": "14022681_19"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 371.4460403674, "return_std": 371.4758548743, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000000800001", "id": "14022681_19"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 343.5611559941, "return_std": 366.4730210821, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000001600001", "id": "14022681_19"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 339.4676434293, "return_std": 356.2865176628, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000003200001", "id": "14022681_19"}, {"snapshot_name": "step_000004200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 347.100374864, "return_std": 357.8757691776, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_19/step_000004200001", "id": "14022681_19"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 159.5817698656, "return_std": 313.873127589, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000000050001", "id": "14022681_20"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 184.9009262596, "return_std": 327.6049874838, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000000100001", "id": "14022681_20"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 377.1069827376, "return_std": 373.5542291962, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000000200001", "id": "14022681_20"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 385.2878377044, "return_std": 369.3661466595, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000000400001", "id": "14022681_20"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 369.8696890201, "return_std": 372.4144573819, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000000800001", "id": "14022681_20"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 345.1927240582, "return_std": 360.1918019185, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000001600001", "id": "14022681_20"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 347.7650398699, "return_std": 360.424373985, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000003200001", "id": "14022681_20"}, {"snapshot_name": "step_000004250001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 338.7630658897, "return_std": 356.205962727, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_20/step_000004250001", "id": "14022681_20"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 92.6503858676, "return_std": 256.5435716116, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000000050001", "id": "14022681_21"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 221.9454701583, "return_std": 343.6839690452, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000000100001", "id": "14022681_21"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 320.2895119743, "return_std": 372.8390444715, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000000200001", "id": "14022681_21"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 378.0131194018, "return_std": 374.573247722, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000000400001", "id": "14022681_21"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 350.0607134556, "return_std": 359.7431143685, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000000800001", "id": "14022681_21"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 347.4446169161, "return_std": 361.9169295324, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000001600001", "id": "14022681_21"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 355.0065027253, "return_std": 363.7236526448, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000003200001", "id": "14022681_21"}, {"snapshot_name": "step_000004650001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 343.4809140343, "return_std": 358.8006714763, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "manipulator_insert_ball", "policy_path": "14022681_21/step_000004650001", "id": "14022681_21"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 348.9339211087, "return_std": 235.8524655013, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000000050001", "id": "14022681_22"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 385.3591533052, "return_std": 241.0014837898, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000000100001", "id": "14022681_22"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 379.1865472203, "return_std": 246.1948266797, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000000200001", "id": "14022681_22"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 361.5728403835, "return_std": 218.0579435421, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000000400001", "id": "14022681_22"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 370.5203509735, "return_std": 213.9315574909, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000000800001", "id": "14022681_22"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 358.8373364211, "return_std": 204.9118769042, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000001600001", "id": "14022681_22"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 354.1022479384, "return_std": 213.7335990673, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000003200001", "id": "14022681_22"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 355.8353644931, "return_std": 216.9792386907, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_stand", "policy_path": "14022681_22/step_000005000001", "id": "14022681_22"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 343.723182233, "return_std": 220.235701503, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000000050001", "id": "14022681_23"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 370.5729032919, "return_std": 231.952680338, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000000100001", "id": "14022681_23"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 370.7579892381, "return_std": 241.1425761913, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000000200001", "id": "14022681_23"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 356.5237047632, "return_std": 235.4850118127, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000000400001", "id": "14022681_23"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 342.4633676039, "return_std": 218.7813062313, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000000800001", "id": "14022681_23"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 366.7936809245, "return_std": 232.7537441648, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000001600001", "id": "14022681_23"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 355.3373480571, "return_std": 211.3875973619, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000003200001", "id": "14022681_23"}, {"snapshot_name": "step_000004850001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 355.3363761274, "return_std": 211.63574523, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_stand", "policy_path": "14022681_23/step_000004850001", "id": "14022681_23"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 340.6047175206, "return_std": 220.0734199673, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000000050001", "id": "14022681_24"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 354.8293346708, "return_std": 228.1905168036, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000000100001", "id": "14022681_24"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 372.7119586975, "return_std": 226.4923676957, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000000200001", "id": "14022681_24"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 363.8285506327, "return_std": 226.1560787625, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000000400001", "id": "14022681_24"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 359.1702772495, "return_std": 224.8850251743, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000000800001", "id": "14022681_24"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 365.7702159562, "return_std": 215.9858993162, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000001600001", "id": "14022681_24"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 359.9994025101, "return_std": 222.9923268694, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000003200001", "id": "14022681_24"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 354.8191977009, "return_std": 211.5660037508, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_stand", "policy_path": "14022681_24/step_000005000001", "id": "14022681_24"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 400.1665972447, "return_std": 344.2075633718, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000000050001", "id": "14022681_25"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 401.8514109344, "return_std": 364.632307978, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000000100001", "id": "14022681_25"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 260.3753995477, "return_std": 297.3574124105, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000000200001", "id": "14022681_25"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 425.0074231406, "return_std": 331.4889619824, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000000400001", "id": "14022681_25"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 414.236988735, "return_std": 325.2973225729, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000000800001", "id": "14022681_25"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 461.4605482433, "return_std": 328.1094966247, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000001600001", "id": "14022681_25"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 447.7521551267, "return_std": 330.2886689514, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000003200001", "id": "14022681_25"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 430.3160841529, "return_std": 332.3736613296, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 0, "task.task_name": "walker_walk", "policy_path": "14022681_25/step_000005000001", "id": "14022681_25"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 417.9109923345, "return_std": 360.7403156163, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000000050001", "id": "14022681_26"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 348.2330125314, "return_std": 296.408778544, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000000100001", "id": "14022681_26"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 359.2186749746, "return_std": 294.1020025844, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000000200001", "id": "14022681_26"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 419.807439272, "return_std": 322.3381418298, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000000400001", "id": "14022681_26"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 392.5242276526, "return_std": 310.1605779408, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000000800001", "id": "14022681_26"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 374.2550851204, "return_std": 306.3644240308, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000001600001", "id": "14022681_26"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 378.8686222688, "return_std": 314.2303099076, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000003200001", "id": "14022681_26"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 361.4339105096, "return_std": 308.2747215802, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 1, "task.task_name": "walker_walk", "policy_path": "14022681_26/step_000005000001", "id": "14022681_26"}, {"snapshot_name": "step_000000050001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 342.0456488793, "return_std": 301.8483108885, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000000050001", "id": "14022681_27"}, {"snapshot_name": "step_000000100001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 421.6269802051, "return_std": 328.5092780896, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000000100001", "id": "14022681_27"}, {"snapshot_name": "step_000000200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 365.9863302841, "return_std": 315.7222856417, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000000200001", "id": "14022681_27"}, {"snapshot_name": "step_000000400001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 372.6607234372, "return_std": 304.5572257287, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000000400001", "id": "14022681_27"}, {"snapshot_name": "step_000000800001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 360.2948928221, "return_std": 298.1634258655, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000000800001", "id": "14022681_27"}, {"snapshot_name": "step_000001600001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 368.7257401101, "return_std": 314.6414091035, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000001600001", "id": "14022681_27"}, {"snapshot_name": "step_000003200001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 359.3116317009, "return_std": 313.3448276232, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000003200001", "id": "14022681_27"}, {"snapshot_name": "step_000005000001", "cwp": false, "is_recurrent": true, "agent_name": "BC", "return_mean": 382.3455346348, "return_std": 321.960982419, "agent.batch_size": 1024, "agent.discount": NaN, "agent.learning_rate": NaN, "agent.policy_comparison_modes": "mean", "agent.policy_improvement_modes": "all", "agent.truncate_on_evaluation": 1.0, "networks.critic_lstm_sizes": [], "networks.do_z_clipping": NaN, "networks.perturbation_factor": NaN, "networks.policy_lstm_sizes": [], "seed": 2, "task.task_name": "walker_walk", "policy_path": "14022681_27/step_000005000001", "id": "14022681_27"}]