Skip to content
Closed
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
222bf13
keep_dims -> keepdims
alok May 2, 2018
a2ab3f7
WIP
alok May 2, 2018
7fc25c6
Get single sample
alok May 2, 2018
470d56a
add test scripts
alok May 2, 2018
19b6510
WIP
alok May 3, 2018
4a24483
Silence PyTorch warnings
alok May 3, 2018
4c52ede
Test A3C with only 1 worker
alok May 3, 2018
18b7692
Use PyTorch's new scalar support
alok May 3, 2018
6745a4b
Use F.mse_loss instead of rolling our own
alok May 3, 2018
aaabe16
Use correct samplebatch key
alok May 3, 2018
5e7fe40
Write magic methods for SampleBatch/PartialRollout
alok May 4, 2018
3d53186
WIP
alok May 4, 2018
19490f4
Fix IndentationError
alok May 6, 2018
0f0a17b
rm Variable for torch 0.4.0
alok May 8, 2018
1dade7c
misc
alok May 8, 2018
353cff9
Fix some shape errors in TRPO
alok May 8, 2018
0fd8f4a
Merge branch 'master' into trpo
alok May 8, 2018
06611c2
Use kl_divergence provided by PyTorch
alok May 8, 2018
06192a5
Use detach() over .data
alok May 8, 2018
bc3dca6
Rename variables
alok May 8, 2018
779fa5c
rm unnecessary `probs` attribute
alok May 8, 2018
2c46098
Fix rewards shape
alok May 9, 2018
0125eef
size -> out_size in SlimFC
alok May 9, 2018
a3ec08d
.size() -> .shape
alok May 9, 2018
6d45db2
Use chain to adjust only action head
alok May 9, 2018
eda75ae
WIP
alok May 9, 2018
8ba4c17
rm trailing comma
alok May 9, 2018
2d43fd0
Update test scripts
alok May 9, 2018
fec90b7
leave note to debug remote_evaluators
alok May 9, 2018
c2592fd
Use .item() to extract number from torch scalar
alok May 9, 2018
3fae65f
temporary fix to zero gradient
alok May 9, 2018
9601b69
fmt
alok May 9, 2018
dfd38c0
rm test scripts
alok May 9, 2018
280be28
fmt
alok May 11, 2018
2fdac3b
Undo lint changes
alok May 11, 2018
be5a5e5
Undo fmt
alok May 11, 2018
75e2d1e
Undo magic methods
alok May 11, 2018
e0da980
Merge branch 'master' into trpo
alok May 11, 2018
23535a8
Calculate remote evaluator only once
alok May 11, 2018
1ea501f
rm needless copy
alok May 11, 2018
82a6698
Merge branch 'master' into trpo
alok May 11, 2018
0151a0a
rm unnecessary copy
alok May 11, 2018
51901ad
Use F.softmax instead of a pointless network layer
alok May 11, 2018
5d7fc19
Use correct pytorch functions
alok May 11, 2018
8583616
Rename argument name to out_size
alok May 11, 2018
18c4a4c
Fix shapes of tensors
alok May 11, 2018
64ae2ab
Fmt
alok May 11, 2018
5b623c0
Register TRPO with other agents
alok May 12, 2018
3073e09
Drop use of numpy in TRPO _backward
alok May 12, 2018
85b5fbf
Re-add deepcopy as stopgap measure
alok May 12, 2018
8accdae
replace deprecated function
alok May 12, 2018
8645cd7
rm unnecessary Variable wrapper
alok May 14, 2018
12bd5d6
Clarify variable name
alok May 14, 2018
47e8ebd
rm all use of torch Variables
alok May 14, 2018
f9e4797
Merge branch 'master' into fix-a3c-torch
alok May 14, 2018
884a6a8
Ensure that values are flat list
alok May 14, 2018
7d1b205
Fix shape error in conv nets
alok May 14, 2018
8542341
Merge branch 'fix-a3c-torch' into trpo
alok May 15, 2018
72f2afc
rm unused import
alok May 15, 2018
2850279
rm unused functions
alok May 15, 2018
8999714
Handle partial rollouts
alok May 15, 2018
aeab1f3
Merge branch 'master' into fix-a3c-torch
alok May 17, 2018
662eaa5
Merge branch 'master' into fix-a3c-2
alok May 21, 2018
f9561d3
fmt
alok May 21, 2018
7f06a1f
Fix shape errors
alok May 24, 2018
0438707
Add TODO
alok May 24, 2018
da8d9e6
Use correct filter size
alok May 24, 2018
db9804d
Add missing channel major
alok May 24, 2018
9aac5bd
Merge branch 'fix-a3c-torch' into trpo
alok May 24, 2018
9246b65
Merge branch 'master' into trpo
alok May 24, 2018
e865a09
Merge branch 'master' into fix-a3c-torch
alok May 25, 2018
a62fa6e
Merge branch 'fix-a3c-torch' into trpo
alok May 25, 2018
27cd897
Revert reshape of action
alok May 25, 2018
75ea9a7
Squeeze action
alok May 29, 2018
87ab87e
Squeeze actions along first dimension
alok May 29, 2018
9acd029
try adding pytorch tests
richardliaw May 29, 2018
c4b8ca7
typo
richardliaw May 29, 2018
6a79793
fixup docker messages
richardliaw May 29, 2018
7cdedf3
Fix A3C for some envs
alok May 30, 2018
da414fc
fmt
alok May 30, 2018
3b9234f
nit flake
richardliaw May 30, 2018
9ddab77
small lint
richardliaw May 30, 2018
ca9b33c
Merge branch 'fix-a3c-torch' into trpo
alok May 30, 2018
51dc392
Use A3C's save/restore/optimizer
alok Jun 1, 2018
f3d401f
ent_coeff -> entropy_coeff
alok Jun 1, 2018
c57989d
Clean up config dicts
alok Jun 1, 2018
6c15780
fmt
alok Jun 1, 2018
cbbaf32
Use async optimizer for TRPO
alok Jun 3, 2018
78ab9d4
Use single quotes
alok Jun 3, 2018
826a2e1
Merge branch 'master' into trpo
alok Jun 3, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/ray/autoscaler/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ def run(self):
self.provider.set_node_tags(self.node_id,
{TAG_RAY_NODE_STATUS: "UpdateFailed"})
if self.logfile is not None:
print("----- BEGIN REMOTE LOGS -----\n" + open(
self.logfile.name).read() + "\n----- END REMOTE LOGS -----"
)
print("----- BEGIN REMOTE LOGS -----\n" +
open(self.logfile.name).read() +
"\n----- END REMOTE LOGS -----")
raise e
self.provider.set_node_tags(
self.node_id, {
Expand Down
17 changes: 8 additions & 9 deletions python/ray/experimental/array/distributed/linalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,9 @@ def tsqr(a):
lower = [a.shape[1], 0]
upper = [2 * a.shape[1], core.BLOCK_SIZE]
ith_index //= 2
q_block_current = ra.dot.remote(q_block_current,
ra.subarray.remote(
q_tree[ith_index, j], lower,
upper))
q_block_current = ra.dot.remote(
q_block_current,
ra.subarray.remote(q_tree[ith_index, j], lower, upper))
q_result.objectids[i] = q_block_current
r = current_rs[0]
return q_result, ray.get(r)
Expand Down Expand Up @@ -222,10 +221,10 @@ def qr(a):
y_col_block = core.subblocks.remote(y_res, [], [i])
q = core.subtract.remote(
q,
core.dot.remote(y_col_block,
core.dot.remote(
Ts[i],
core.dot.remote(
core.transpose.remote(y_col_block), q))))
core.dot.remote(
y_col_block,
core.dot.remote(
Ts[i],
core.dot.remote(core.transpose.remote(y_col_block), q))))

return ray.get(q), r_res
8 changes: 4 additions & 4 deletions python/ray/experimental/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,8 @@ def micros_rel(ts):
"name":
"SubmitTask",
"args": {},
"id": (parent_info["worker_id"] +
str(micros(min(parent_times))))
"id": (parent_info["worker_id"] + str(
micros(min(parent_times))))
}
full_trace.append(parent)

Expand Down Expand Up @@ -825,8 +825,8 @@ def micros_rel(ts):
"name":
"SubmitTask",
"args": {},
"id": (parent_info["worker_id"] +
str(micros(min(parent_times))))
"id": (parent_info["worker_id"] + str(
micros(min(parent_times))))
}
full_trace.append(parent)

Expand Down
12 changes: 6 additions & 6 deletions python/ray/experimental/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,8 @@ def task_completion_time_update(abs_earliest, abs_latest, abs_num_tasks,
# Create the distribution to plot
distr = []
for task_id, data in tasks.items():
distr.append(
data["store_outputs_end"] - data["get_arguments_start"])
distr.append(data["store_outputs_end"] -
data["get_arguments_start"])

# Create a histogram from the distribution
top, bin_edges = np.histogram(distr, bins="auto")
Expand Down Expand Up @@ -520,10 +520,10 @@ def compute_utilizations(abs_earliest,
# Walk over each time bucket that this task intersects, adding the
# amount of time that the task intersects within each bucket
for bucket_idx in range(start_bucket, end_bucket + 1):
bucket_start_time = ((
earliest_time + bucket_idx) * bucket_time_length)
bucket_end_time = ((earliest_time +
(bucket_idx + 1)) * bucket_time_length)
bucket_start_time = (
(earliest_time + bucket_idx) * bucket_time_length)
bucket_end_time = (
(earliest_time + (bucket_idx + 1)) * bucket_time_length)

task_start_time_within_bucket = max(task_start_time,
bucket_start_time)
Expand Down
5 changes: 3 additions & 2 deletions python/ray/log_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ def __init__(self, redis_ip_address, redis_port, node_ip_address):
def update_log_filenames(self):
"""Get the most up-to-date list of log files to monitor from Redis."""
num_current_log_files = len(self.log_files)
new_log_filenames = self.redis_client.lrange("LOG_FILENAMES:{}".format(
self.node_ip_address), num_current_log_files, -1)
new_log_filenames = self.redis_client.lrange(
"LOG_FILENAMES:{}".format(self.node_ip_address),
num_current_log_files, -1)
for log_filename in new_log_filenames:
print("Beginning to track file {}".format(log_filename))
assert log_filename not in self.log_files
Expand Down
11 changes: 5 additions & 6 deletions python/ray/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ def cleanup_object_table(self):
if manager in self.dead_plasma_managers:
# If the object was on a dead plasma manager, remove that
# location entry.
ok = self.state._execute_command(object_id,
"RAY.OBJECT_TABLE_REMOVE",
object_id.id(),
hex_to_binary(manager))
ok = self.state._execute_command(
object_id, "RAY.OBJECT_TABLE_REMOVE", object_id.id(),
hex_to_binary(manager))
if ok != b"OK":
log.warn("Failed to remove object location for dead "
"plasma manager.")
Expand Down Expand Up @@ -507,8 +506,8 @@ def run(self):
log.debug("{} dead local schedulers, {} plasma managers total, {} "
"dead plasma managers".format(
len(self.dead_local_schedulers),
(len(self.live_plasma_managers) +
len(self.dead_plasma_managers)),
(len(self.live_plasma_managers) + len(
self.dead_plasma_managers)),
len(self.dead_plasma_managers)))

# Handle messages from the subscription channels.
Expand Down
14 changes: 8 additions & 6 deletions python/ray/rllib/a3c/shared_torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def compute(self, ob, *args):
with self.lock:
ob = Variable(torch.from_numpy(ob).float().unsqueeze(0))
logits, values = self._model(ob)
samples = self._model.probs(logits).multinomial().squeeze()
values = values.squeeze(0)
samples = F.softmax(logits, dim=1).multinomial(num_samples=1).squeeze()
values = values.squeeze()
return var_to_np(samples), {"vf_preds": var_to_np(values)}

def compute_logits(self, ob, *args):
Expand All @@ -48,15 +48,17 @@ def value(self, ob, *args):
ob = Variable(torch.from_numpy(ob).float().unsqueeze(0))
res = self._model.hidden_layers(ob)
res = self._model.value_branch(res)
res = res.squeeze(0)
res = res.squeeze()
return var_to_np(res)

def _evaluate(self, obs, actions):
"""Passes in multiple obs."""
logits, values = self._model(obs)
log_probs = F.log_softmax(logits)
probs = self._model.probs(logits)
log_probs = F.log_softmax(logits,dim=1)
probs = F.softmax(logits, dim=1)
action_log_probs = log_probs.gather(1, actions.view(-1, 1))
# TODO(alok): set distribution based on action space and use its
# .entropy() method to calculate automatically
entropy = -(log_probs * probs).sum(-1).sum()
return values, action_log_probs, entropy

Expand All @@ -67,7 +69,7 @@ def _backward(self, batch):
states, acs, advs, rs, _ = convert_batch(batch)
values, ac_logprobs, entropy = self._evaluate(states, acs)
pi_err = -(advs * ac_logprobs).sum()
value_err = 0.5 * (values - rs).pow(2).sum()
value_err = F.mse_loss(values, rs)

self.optimizer.zero_grad()
overall_err = (pi_err +
Expand Down
2 changes: 2 additions & 0 deletions python/ray/rllib/a3c/torchpolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import division
from __future__ import print_function

from copy import deepcopy
import torch
from torch.autograd import Variable

Expand All @@ -26,6 +27,7 @@ def __init__(self, registry, ob_space, action_space, config,
self.lock = Lock()

def apply_gradients(self, grads):
grads = deepcopy(grads) # to prevent zero_grad from clearing grads since they may share memory with the policy's .grad tensors
self.optimizer.zero_grad()
for g, p in zip(grads, self._model.parameters()):
p.grad = Variable(torch.from_numpy(g))
Expand Down
7 changes: 3 additions & 4 deletions python/ray/rllib/models/pytorch/fcnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,11 @@ def _init(self, inputs, num_outputs, options):
self.hidden_layers = nn.Sequential(*layers)

self.logits = SlimFC(
last_layer_size, num_outputs,
in_size=last_layer_size, out_size=num_outputs,
initializer=normc_initializer(0.01),
activation_fn=None)
self.probs = nn.Softmax()
self.value_branch = SlimFC(
last_layer_size, 1,
in_size=last_layer_size, out_size=1,
initializer=normc_initializer(1.0),
activation_fn=None)

Expand All @@ -52,5 +51,5 @@ def forward(self, obs):
value: value function for each state"""
res = self.hidden_layers(obs)
logits = self.logits(res)
value = self.value_branch(res)
value = self.value_branch(res).reshape(-1)
return logits, value
22 changes: 8 additions & 14 deletions python/ray/rllib/models/pytorch/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,32 @@

import numpy as np
import torch
from torch.autograd import Variable


def convert_batch(trajectory, has_features=False):
"""Convert trajectory from numpy to PT variable"""
states = Variable(torch.from_numpy(
trajectory["observations"]).float())
acs = Variable(torch.from_numpy(
trajectory["actions"]))
advs = Variable(torch.from_numpy(
trajectory["advantages"].copy()).float())
advs = advs.view(-1, 1)
rs = Variable(torch.from_numpy(
trajectory["value_targets"]).float())
rs = rs.view(-1, 1)
states = torch.from_numpy(trajectory["obs"]).float()
acs = torch.from_numpy(trajectory["actions"])
advs = torch.from_numpy(trajectory["advantages"].copy()).float().view(
-1, 1)
rs = torch.from_numpy(trajectory["rewards"]).float().view(-1)
if has_features:
features = [Variable(torch.from_numpy(f))
for f in trajectory["features"]]
features = [torch.from_numpy(f) for f in trajectory["features"]]
else:
features = trajectory["features"]
return states, acs, advs, rs, features


def var_to_np(var):
return var.data.numpy()[0]
return var.data.numpy()


def normc_initializer(std=1.0):
def initializer(tensor):
tensor.data.normal_(0, 1)
tensor.data *= std / torch.sqrt(
tensor.data.pow(2).sum(1, keepdim=True))

return initializer


Expand Down
6 changes: 3 additions & 3 deletions python/ray/rllib/models/pytorch/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ def forward(self, x):
class SlimFC(nn.Module):
"""Simple PyTorch of `linear` function"""

def __init__(self, in_size, size, initializer=None,
def __init__(self, in_size, out_size, initializer=None,
activation_fn=None, bias_init=0):
super(SlimFC, self).__init__()
layers = []
linear = nn.Linear(in_size, size)
linear = nn.Linear(in_size, out_size)
if initializer:
initializer(linear.weight)
nn.init.constant(linear.bias, bias_init)
nn.init.constant_(linear.bias, bias_init)
layers.append(linear)
if activation_fn:
layers.append(activation_fn())
Expand Down
1 change: 0 additions & 1 deletion python/ray/rllib/models/pytorch/visionnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def _init(self, inputs, num_outputs, options):

self.logits = SlimFC(
out_channels, num_outputs, initializer=nn.init.xavier_uniform)
self.probs = nn.Softmax()
self.value_branch = SlimFC(
out_channels, 1, initializer=normc_initializer())

Expand Down
36 changes: 27 additions & 9 deletions python/ray/rllib/optimizers/sample_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def __init__(self, *args, **kwargs):
@staticmethod
def concat_samples(samples):
out = {}
for k in samples[0].data.keys():
out[k] = np.concatenate([s.data[k] for s in samples])
for k in samples[0].keys():
out[k] = np.concatenate([s[k] for s in samples])
return SampleBatch(out)

def concat(self, other):
Expand All @@ -50,10 +50,10 @@ def concat(self, other):
{"a": [1, 2, 3, 4, 5]}
"""

assert self.data.keys() == other.data.keys(), "must have same columns"
assert self.keys() == other.keys(), "must have same columns"
out = {}
for k in self.data.keys():
out[k] = np.concatenate([self.data[k], other.data[k]])
for k in self.keys():
out[k] = np.concatenate([self[k], other[k]])
return SampleBatch(out)

def rows(self):
Expand All @@ -70,7 +70,7 @@ def rows(self):

for i in range(self.count):
row = {}
for k in self.data.keys():
for k in self.keys():
row[k] = self[k][i]
yield row

Expand All @@ -85,19 +85,37 @@ def columns(self, keys):

out = []
for k in keys:
out.append(self.data[k])
out.append(self[k])
return out

def shuffle(self):
permutation = np.random.permutation(self.count)
for key, val in self.data.items():
self.data[key] = val[permutation]
for key, val in self.items():
self[key] = val[permutation]

def __getitem__(self, key):
return self.data[key]

def __setitem__(self, key, item):
self.data[key] = item

def __str__(self):
return "SampleBatch({})".format(str(self.data))

def __repr__(self):
return "SampleBatch({})".format(str(self.data))

def keys(self):
return self.data.keys()

def items(self):
return self.data.items()

def __iter__(self):
return self.data.__iter__()

def __next__(self):
return self.data.__next__()

def __contains__(self, x):
return x in self.data
8 changes: 4 additions & 4 deletions python/ray/rllib/test/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def testConcat(self):
b2 = SampleBatch({"a": np.array([1]), "b": np.array([4])})
b3 = SampleBatch({"a": np.array([1]), "b": np.array([5])})
b12 = b1.concat(b2)
self.assertEqual(b12.data["a"].tolist(), [1, 2, 3, 1])
self.assertEqual(b12.data["b"].tolist(), [4, 5, 6, 4])
self.assertEqual(b12["a"].tolist(), [1, 2, 3, 1])
self.assertEqual(b12["b"].tolist(), [4, 5, 6, 4])
b = SampleBatch.concat_samples([b1, b2, b3])
self.assertEqual(b.data["a"].tolist(), [1, 2, 3, 1, 1])
self.assertEqual(b.data["b"].tolist(), [4, 5, 6, 4, 5])
self.assertEqual(b["a"].tolist(), [1, 2, 3, 1, 1])
self.assertEqual(b["b"].tolist(), [4, 5, 6, 4, 5])


if __name__ == '__main__':
Expand Down
6 changes: 6 additions & 0 deletions python/ray/rllib/trpo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ray.rllib.trpo.trpo import DEFAULT_CONFIG, TRPOAgent

__all__ = [
'TRPOAgent',
'DEFAULT_CONFIG',
]
Loading