Skip to content

Commit

Permalink
Updates optimizer, and target update interval
Browse files Browse the repository at this point in the history
  • Loading branch information
prabhatnagarajan committed Oct 28, 2020
1 parent 4e15a76 commit 71809a5
Showing 1 changed file with 3 additions and 11 deletions.
14 changes: 3 additions & 11 deletions examples/her/train_dqn_bit_flip.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,15 +172,7 @@ def make_env(test):
DiscreteActionValueHead(),
)

# Use the same hyperparameters as the Nature paper
opt = pfrl.optimizers.RMSpropEpsInsideSqrt(
q_func.parameters(),
lr=2.5e-4,
alpha=0.95,
momentum=0.0,
eps=1e-2,
centered=True,
)
opt = torch.optim.Adam(q_func.parameters(), eps=1e-3)

if args.use_hindsight:
rbuf = replay_buffers.hindsight.HindsightReplayBuffer(
Expand All @@ -192,7 +184,7 @@ def make_env(test):
rbuf = replay_buffers.ReplayBuffer(10 ** 6)

explorer = explorers.LinearDecayEpsilonGreedy(
start_epsilon=1.0,
start_epsilon=0.3,
end_epsilon=0.0,
decay_steps=5 * 10 ** 3,
random_action_func=lambda: np.random.randint(n_actions),
Expand All @@ -213,7 +205,7 @@ def phi(observation):
gamma=0.99,
explorer=explorer,
replay_start_size=args.replay_start_size,
target_update_interval=10 ** 4,
target_update_interval=10 ** 3,
clip_delta=True,
update_interval=4,
batch_accumulator="sum",
Expand Down

0 comments on commit 71809a5

Please sign in to comment.