Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load model #44

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions examples/advanced/create2_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import time
import sys
import copy
import numpy as np
import pickle as pkl
import baselines.common.tf_util as U

import senseact.devices.create2.create2_config as create2_config
Expand All @@ -20,6 +22,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand All @@ -38,7 +48,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -50,7 +60,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
15 changes: 13 additions & 2 deletions examples/advanced/create2_mover.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
# LICENSE file in the root directory of this source tree.
import time
import copy
import sys
import numpy as np
import pickle as pkl
import baselines.common.tf_util as U

import senseact.devices.create2.create2_config as create2_config
Expand All @@ -20,6 +22,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand All @@ -35,9 +45,10 @@ def main():
# Create baselines TRPO policy function
sess = U.single_threaded_session()
sess.__enter__()

def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -49,7 +60,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
16 changes: 12 additions & 4 deletions examples/advanced/dxl_reacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import sys
import time
import copy

import numpy as np
import baselines.common.tf_util as U
import pickle as pkl

import baselines.common.tf_util as U
from baselines.trpo_mpi.trpo_mpi import learn
from baselines.ppo1.mlp_policy import MlpPolicy
from senseact.envs.dxl.dxl_reacher_env import DxlReacher1DEnv
Expand All @@ -17,6 +18,13 @@
from helper import create_callback

def main():
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand Down Expand Up @@ -54,7 +62,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -66,7 +74,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
16 changes: 12 additions & 4 deletions examples/advanced/dxl_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import sys
import time
import copy

import numpy as np
import baselines.common.tf_util as U
import pickle as pkl

import baselines.common.tf_util as U
from baselines.trpo_mpi.trpo_mpi import learn
from baselines.ppo1.mlp_policy import MlpPolicy
from senseact.envs.dxl.dxl_tracker_env import DxlTracker1DEnv
Expand All @@ -17,6 +18,13 @@
from helper import create_callback

def main():
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand Down Expand Up @@ -54,7 +62,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -66,7 +74,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
29 changes: 27 additions & 2 deletions examples/advanced/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,21 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import os
import builtins
import tempfile, zipfile

def create_callback(shared_returns):

def create_callback(shared_returns, load_model_data=None):
builtins.shared_returns = shared_returns
builtins.load_model_data = load_model_data

def kindred_callback(locals, globals):
shared_returns = globals['__builtins__']['shared_returns']
if locals['iters_so_far'] > 0:
if locals['iters_so_far'] == 0:
if globals['__builtins__']['load_model_data'] is not None:
tf_load_session_from_pickled_model(globals['__builtins__']['load_model_data'])
else:
ep_rets = locals['seg']['ep_rets']
ep_lens = locals['seg']['ep_lens']
if len(ep_rets):
Expand All @@ -20,3 +27,21 @@ def kindred_callback(locals, globals):
shared_returns['episodic_lengths'] += ep_lens
shared_returns['write_lock'] = False
return kindred_callback


def tf_load_session_from_pickled_model(load_model_data):
"""
Restores tensorflow session from a zip file.
:param load_model_path: A zip file containing tensorflow .ckpt and additional files.
:return: None. Just restores the tensorflow session
"""
import tensorflow as tf
with tempfile.TemporaryDirectory() as td:
arc_path = os.path.join(td, "packed.zip")
with open(arc_path, "wb") as f:
f.write(load_model_data['model'])

zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)

saver = tf.train.Saver()
saver.restore(tf.get_default_session(), os.path.join(td, "model"))
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14 changes: 12 additions & 2 deletions examples/advanced/ur5_reacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import sys
import time
import copy
import numpy as np
import pickle as pkl

import baselines.common.tf_util as U
from multiprocessing import Process, Value, Manager
Expand All @@ -18,6 +20,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (32, 32)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand Down Expand Up @@ -56,7 +66,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=32, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -68,7 +78,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down
14 changes: 12 additions & 2 deletions examples/advanced/ur5_reacher_6D.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import sys
import time
import copy
import numpy as np
import pickle as pkl

import baselines.common.tf_util as U
from multiprocessing import Process, Value, Manager
Expand All @@ -18,6 +20,14 @@


def main():
# optionally use a pretrained model
load_model_data = None
hidden_sizes = (64, 64)
if len(sys.argv) > 1:
load_model_path = sys.argv[1]
load_model_data = pkl.load(open(load_model_path, 'rb'))
hidden_sizes = load_model_data['hidden_sizes']

# use fixed random state
rand_state = np.random.RandomState(1).get_state()
np.random.set_state(rand_state)
Expand Down Expand Up @@ -56,7 +66,7 @@ def main():
sess.__enter__()
def policy_fn(name, ob_space, ac_space):
return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
hid_size=64, num_hid_layers=2)
hid_size=hidden_sizes[0], num_hid_layers=len(hidden_sizes))

# Create and start plotting process
plot_running = Value('i', 1)
Expand All @@ -68,7 +78,7 @@ def policy_fn(name, ob_space, ac_space):
pp.start()

# Create callback function for logging data from baselines TRPO learn
kindred_callback = create_callback(shared_returns)
kindred_callback = create_callback(shared_returns, load_model_data)

# Train baselines TRPO
learn(env, policy_fn,
Expand Down