Skip to content

Commit

Permalink
Merge entropy branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaixhin committed Jul 21, 2016
2 parents bff703a + 3025a2a commit d2ebc06
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions async/A3CAgent.lua
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,17 @@ function A3CAgent:accumulateGradients(terminal, state)
probability:add(TINY_EPSILON) -- could contain 0 -> log(0)= -inf -> theta = nans

self.vTarget[1] = -0.5 * (R - V)

-- ∇θ logp(s) = 1/p(a) for chosen a, 0 otherwise
self.policyTarget:zero()
local logProbability = torch.log(probability)
-- Add (negative of) gradient of entropy of policy to target to improve exploration (prevent convergence to suboptimal deterministic policy)
self.policyTarget[action] = -(R - V) / probability[action] - self.beta * logProbability:sum()
-- f(s) ∇θ logp(s)
self.policyTarget[action] = -(R - V) / probability[action] -- Negative target for gradient descent

-- Calculate (negative of) gradient of entropy of policy (for gradient descent): -(-logp(s) - 1)
local gradEntropy = torch.log(probability) + 1
-- Add to target to improve exploration (prevent convergence to suboptimal deterministic policy)
self.policyTarget:add(self.beta, gradEntropy)

self.policyNet_:backward(self.states[i], self.targets)
end
end
Expand Down

0 comments on commit d2ebc06

Please sign in to comment.