-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpusher3dof.py
123 lines (98 loc) · 4.52 KB
/
pusher3dof.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
import xml.etree.ElementTree as et
import mujoco_py
class PusherEnv3DofEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self, **kwargs):
utils.EzPickle.__init__(self)
self.reference_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'assets/pusher_3dof.xml')
mujoco_env.MujocoEnv.__init__(self, self.reference_path, frame_skip=5)
self.model.stat.extent = 10
# randomization
self.reference_xml = et.parse(self.reference_path)
self.config_file = kwargs.get('config')
self.dimensions = []
self._locate_randomize_parameters()
def _locate_randomize_parameters(self):
self.root = self.reference_xml.getroot()
end_effector = self.root.find(".//body[@name='distal_4']")
self.wrist = end_effector.findall("./geom[@type='capsule']")
self.tips = end_effector.findall(".//body[@name='tips_arm']/geom")
self.object_body = self.root.find(".//body[@name='object']/geom")
self.object_joints = self.root.findall(".//body[@name='object']/joint")
def update_randomized_params(self):
xml = self._create_xml()
self._re_init(xml)
def _re_init(self, xml):
self.model = mujoco_py.load_model_from_xml(xml)
self.sim = mujoco_py.MjSim(self.model)
self.data = self.sim.data
self.init_qpos = self.data.qpos.ravel().copy()
self.init_qvel = self.data.qvel.ravel().copy()
observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
assert not done
if self.viewer:
self.viewer.update_sim(self.sim)
def _create_xml(self):
self._randomize_friction()
self._randomize_damping()
# self._randomize_size()
return et.tostring(self.root, encoding='unicode', method='xml')
def _randomize_friction(self):
frictionloss = self.dimensions[0].current_value
for joint in self.object_joints:
joint.set('frictionloss', '{:3f}'.format(frictionloss))
def _randomize_damping(self):
damping = self.dimensions[1].current_value
for joint in self.object_joints:
joint.set('damping', '{:3f}'.format(damping))
def _randomize_size(self):
size = self.dimensions[2].current_value
# grabber
grabber_width = size * 2
self.wrist[0].set('fromto', '0 -{:3f} 0. 0.0 +{:3f} 0'.format(grabber_width, grabber_width))
self.wrist[1].set('fromto', '0 -{:3f} 0. {:3f} -{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
self.wrist[2].set('fromto', '0 +{:3f} 0. {:3f} +{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
self.tips[0].set('pos', '{:3f} -{:3f} 0.'.format(grabber_width, grabber_width))
self.tips[1].set('pos', '{:3f} {:3f} 0.'.format(grabber_width, grabber_width))
def step(self, action):
arm_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("tips_arm")[:2])
goal_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("goal")[:2])
action_cost = np.square(action).sum()
reward = -0.1 * action_cost - goal_dist
self.do_simulation(action, self.frame_skip)
ob = self._get_obs()
done = False
return ob, reward, done, {'arm_dist': arm_dist, 'goal_dist': goal_dist}
def viewer_setup(self):
coords = [.7, -.5, 0]
for i in range(3):
self.viewer.cam.lookat[i] = coords[i]
self.viewer.cam.trackbodyid = -1
self.viewer.cam.distance = 2
def reset_model(self):
qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
while True:
object_ = np.random.uniform(low=[.4,-1.0], high=[1.2,-0.5])
goal = np.random.uniform(low=[.8,-1.2], high=[1.2,-0.8])
if np.linalg.norm(object_ - goal) > 0.45:
break
self.object = np.array(object_)
self.goal = np.array(goal)
qpos[-4:-2] = self.object
qpos[-2:] = self.goal
qvel = self.init_qvel
qvel[-4:] = 0
self.set_state(qpos, qvel)
return self._get_obs()
def _get_obs(self):
return np.concatenate([
self.sim.data.qpos.flat[:-4],
self.sim.data.qvel.flat[:-4],
self.get_body_com("distal_4")[:2],
self.get_body_com("object")[:2],
self.get_body_com("goal")[:2],
])