Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Algorithm: Option Critic methods #278

Draft
wants to merge 22 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3b49f85
[WIP] OC
DavidSlayback Sep 23, 2022
30e4928
[WIP] dqoc
DavidSlayback Sep 24, 2022
ca98f83
Getting close. Need to figure out the terminal observation thing ion …
DavidSlayback Sep 26, 2022
972aeed
Welp, it runs! Still having issues with timelimit truncation
DavidSlayback Sep 26, 2022
3ca34dd
Switch to gym builtins. Just need a replay buffer
DavidSlayback Sep 26, 2022
adff9bc
Runs all the way through, needs tests
DavidSlayback Sep 27, 2022
1904daf
A2OC WIP
DavidSlayback Sep 27, 2022
d5624df
Runs through, need to verify the sign
DavidSlayback Sep 27, 2022
57073f6
[WIP] Hoc...tons of notes not here
DavidSlayback Sep 28, 2022
5178eea
[WIP] PPOC
DavidSlayback Sep 29, 2022
f9e87d6
Thinking
DavidSlayback Sep 29, 2022
7fb5f6a
Notes on errors
DavidSlayback Sep 30, 2022
6b981d2
Getting there
DavidSlayback Oct 2, 2022
6c8d07e
My version works, now to lobotomize it into the original form!
DavidSlayback Oct 3, 2022
99730da
Factor termination into inter-option policy loss
DavidSlayback Oct 3, 2022
9707f6c
Merge branch 'vwxyzjn:master' into algorithm/oc
DavidSlayback Oct 3, 2022
9d3526c
Merge branch 'vwxyzjn:master' into algorithm/oc
DavidSlayback Oct 4, 2022
ec50907
Merge branch 'vwxyzjn:master' into algorithm/oc
DavidSlayback Oct 6, 2022
4bdecf9
Merge branch 'vwxyzjn:master' into algorithm/oc
DavidSlayback Oct 14, 2022
15fdfb3
Recent updates
DavidSlayback Oct 15, 2022
d175e39
Additional algorithms
DavidSlayback Oct 16, 2022
5d5fbcb
Merge branch 'vwxyzjn:master' into algorithm/oc
DavidSlayback Oct 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
418 changes: 418 additions & 0 deletions cleanrl/oc/a2oc_atari_envpool.py

Large diffs are not rendered by default.

433 changes: 433 additions & 0 deletions cleanrl/oc/a2oc_atari_envpool_original.py

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions cleanrl/oc/assets/hopper_icewall_1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<geom name="wall" pos="2.8 0 0.06" density="0.00001" rgba="1. 0. 1. 1" size="0.2 0.4 0.12" type="box"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13 0 0">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>
51 changes: 51 additions & 0 deletions cleanrl/oc/assets/tmaze.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<mujoco model="twod_point">
<compiler inertiafromgeom="true" angle="radian" coordinate="local" />
<option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
<default>
<joint limited="false" damping="1" />
<geom contype="2" conaffinity="1" condim="1" friction=".5 .1 .1" density="1000" margin="0.002" />
</default>

<worldbody>
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
<!-- Pointmass -->
<body name="particle" pos="0. -0.1 0">
<geom name="particle_geom" type="sphere" size="0.03" rgba="0.0 0.0 1.0 1" contype="1"/>
<site name="particle_site" pos="0 0 0" size="0.01" />
<joint name="ball_x" type="slide" pos="0 0 0" axis="1 0 0" />
<joint name="ball_y" type="slide" pos="0 0 0" axis="0 1 0" />
</body>




<!-- Arena -->
<geom conaffinity="1" fromto="-.1 -.2 .01 .1 -.2 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>

<geom conaffinity="1" fromto=" .1 -.2 .01 .1 .2 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="1" fromto=" -.1 -.2 .01 -.1 .2 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>

<geom conaffinity="1" fromto=" .1 .2 .01 .4 .2 .01" name="sideEbottom" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="1" fromto=" -.1 .2 .01 -.4 .2 .01" name="sideWbottom" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>

<geom conaffinity="1" fromto=" .4 .2 .01 .4 .45 .01" name="sideEside" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
<geom conaffinity="1" fromto=" -.4 .2 .01 -.4 .45 .01" name="sideWside" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>

<geom conaffinity="1" fromto="-.4 .45 .01 .4 .45 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>


<!-- Target -->
<body name="targetl" pos="-0.3 0.3 0">
<geom conaffinity="2" name="target_geoml" type="sphere" size="0.03" rgba="0 0.9 0.1 1"/>
</body>
<body name="targetr" pos="0.3 0.3 0">
<geom conaffinity="2" name="target_geomr" type="sphere" size="0.03" rgba="0 0.9 0.1 1"/>
</body>

</worldbody>

<actuator>
<motor joint="ball_x" ctrlrange="-1 1" ctrllimited="true"/>
<motor joint="ball_y" ctrlrange="-1 1" ctrllimited="true"/>
</actuator>
</mujoco>
Loading