Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pretrained model for Create2 docker (TRPO) #43

Merged
merged 1 commit into from
Nov 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions examples/advanced/create2_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import time
import sys
import copy
import numpy as np
import pickle as pkl
import baselines.common.tf_util as U

import senseact.devices.create2.create2_config as create2_config
Expand All @@ -20,6 +22,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand All @@ -38,7 +48,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -50,7 +60,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
15 changes: 13 additions & 2 deletions examples/advanced/create2_mover.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
# LICENSE file in the root directory of this source tree.
import time
import copy
import sys
import numpy as np
import pickle as pkl
import baselines.common.tf_util as U

import senseact.devices.create2.create2_config as create2_config
Expand All @@ -20,6 +22,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand All @@ -35,9 +45,10 @@ def main():
# Create baselines TRPO policy function
sess = U.single_threaded_session()
sess.__enter__()

def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -49,7 +60,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
Binary file not shown.
1 change: 1 addition & 0 deletions examples/advanced/ur5_reacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
Expand Down
14 changes: 12 additions & 2 deletions examples/advanced/ur5_reacher_6D.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import sys
import time
import copy
import numpy as np
import pickle as pkl

import baselines.common.tf_util as U
from multiprocessing import Process, Value, Manager
Expand All @@ -18,6 +20,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (64, 64)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand Down Expand Up @@ -56,7 +66,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -68,7 +78,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down