haosulab · StoneT2000 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024
diff --git a/docs/source/tasks/control/index.md b/docs/source/tasks/control/index.md
@@ -89,6 +89,44 @@ Hopper robot stands upright
 - No specific success conditions. We can threshold the episode accumulated reward to determine success.
 :::
 
+## MS-AntWalk-v1
+![dense-reward][reward-badge]
+
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+**Task Description:**
+Ant moves in x direction at 0.5 m/s
+
+**Supported Robots: ant**
+
+**Randomizations:**
+- Ant qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2]
+
+**Success Conditions:**
+- No specific success conditions. The task is considered successful if the ant walks at 0.5 m/s for the whole episode. We can threshold the episode accumulated reward to determine success.
+:::
+
+## MS-AntRun-v1
+![dense-reward][reward-badge]
+
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+**Task Description:**
+Ant moves in x direction at 4 m/s
+
+**Supported Robots: ant**
+
+**Randomizations:**
+- Ant qpos and qvel have added noise from uniform distribution [-1e-2, 1e-2]
+
+**Success Conditions:**
+- No specific success conditions. The task is considered successful the ant walks at 4 m/s for the whole episode. We can threshold the episode accumulated reward to determine success.
+:::
+
 ## MS-HumanoidStand-v1
 ![dense-reward][reward-badge]
 

diff --git a/examples/baselines/ppo/examples.sh b/examples/baselines/ppo/examples.sh
@@ -49,17 +49,21 @@ python ppo.py --env_id="MS-CartpoleSwingUp-v1" \
    --total_timesteps=10_000_000 --num-steps=250 --num-eval-steps=1000 \
    --gamma=0.99 --gae_lambda=0.95 \
    --eval_freq=5
-python ppo.py --env_id="MS-HumanoidStand-v1" --num_envs=2048 \
+python ppo.py --env_id="MS-AntWalk-v1" --num_envs=2048 --eval_freq=10 \
+  --update_epochs=8 --num_minibatches=32 --total_timesteps=20_000_000 \
+  --num_eval_steps=1000 --num_steps=200 --gamma=0.97 --ent_coef=1e-3
+python ppo.py --env_id="MS-AntRun-v1" --num_envs=2048 --eval_freq=10 \
+  --update_epochs=8 --num_minibatches=32 --total_timesteps=20_000_000 \
+  --num_eval_steps=1000 --num_steps=200 --gamma=0.97 --ent_coef=1e-3
+python ppo.py --env_id="MS-HumanoidStand-v1" --num_envs=2048 --eval_freq=10 \
   --update_epochs=8 --num_minibatches=32 --total_timesteps=40_000_000 \
-  --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.95
-python ppo.py --env_id="MS-HumanoidWalk-v1" --num_envs=2048 \
+  --num_eval_steps=1000 --num_steps=200 --gamma=0.95
+python ppo.py --env_id="MS-HumanoidWalk-v1" --num_envs=2048 --eval_freq=10 \
   --update_epochs=8 --num_minibatches=32 --total_timesteps=80_000_000 \
-  --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.97 \
-  --ent_coef=1e-3
-python ppo.py --env_id="MS-HumanoidRun-v1" --num_envs=2048 \
+  --num_eval_steps=1000 --num_steps=200 --gamma=0.97 --ent_coef=1e-3
+python ppo.py --env_id="MS-HumanoidRun-v1" --num_envs=2048 --eval_freq=10 \
   --update_epochs=8 --num_minibatches=32 --total_timesteps=60_000_000 \
-  --eval_freq=10 --num_eval_steps=1000 --num_steps=200 --gamma=0.97 \
-  --ent_coef=1e-3
+  --num_eval_steps=1000 --num_steps=200 --gamma=0.97 --ent_coef=1e-3
 python ppo.py --env_id="UnitreeG1PlaceAppleInBowl-v1" \
   --num_envs=512 --update_epochs=8 --num_minibatches=32 \
   --total_timesteps=50_000_000 --num-steps=100 --num-eval-steps=100
@@ -109,6 +113,16 @@ python ppo_rgb.py --env_id="PickSingleYCB-v1" \
 python ppo_rgb.py --env_id="PushT-v1" \
   --num_envs=256 --update_epochs=8 --num_minibatches=8 \
   --total_timesteps=25_000_000 --num-steps=100 --num_eval_steps=100 --gamma=0.99
+python ppo_rgb.py --env_id="MS-AntWalk-v1" \
+ --num_envs=256 --update_epochs=8 --num_minibatches=32 \
+ --total_timesteps=5_000_000 --eval_freq=15 --num_eval_steps=1000 \
+ --num_steps=200 --gamma=0.97 --no-include-state --render_mode="rgb_array" \
+ --ent_coef=1e-3
+python ppo_rgb.py --env_id="MS-AntRun-v1" \
+ --num_envs=256 --update_epochs=8 --num_minibatches=32 \
+ --total_timesteps=15_000_000 --eval_freq=15 --num_eval_steps=1000 \
+ --num_steps=200 --gamma=0.97 --no-include-state --render_mode="rgb_array" \
+ --ent_coef=1e-3
 python ppo_rgb.py --env_id="MS-HumanoidRun-v1" \
   --num_envs=256 --update_epochs=8 --num_minibatches=32 \
   --total_timesteps=80_000_000 --eval_freq=15 --num_eval_steps=1000 \

diff --git a/mani_skill/envs/tasks/control/__init__.py b/mani_skill/envs/tasks/control/__init__.py
@@ -1,3 +1,4 @@
 from .cartpole import CartpoleBalanceEnv, CartpoleSwingUpEnv
 from .hopper import HopperHopEnv, HopperStandEnv
 from .humanoid import HumanoidRun, HumanoidStand, HumanoidWalk
+from .ant import AntWalk, AntRun