Use validation memory for validation

Fixes #16
Kaixhin · May 7, 2016 · a9dc1f2 · a9dc1f2
1 parent bc71583
commit a9dc1f2
Showing 1 changed file with 9 additions and 2 deletions.
diff --git a/Agent.lua b/Agent.lua
@@ -251,7 +251,7 @@ function Agent:observe(reward, rawObservation, terminal)
 end
 
 -- Learns from experience
-function Agent:learn(x, indices, ISWeights)
+function Agent:learn(x, indices, ISWeights, isValidation)
   -- Copy x to parameters θ if necessary
   if x ~= self.theta then
     self.theta:copy(x)
@@ -260,7 +260,8 @@ function Agent:learn(x, indices, ISWeights)
   self.dTheta:zero()
 
   -- Retrieve experience tuples
-  local states, actions, rewards, transitions, terminals = self.memory:retrieve(indices) -- Terminal status is for transition (can't act in terminal state)
+  local memory = isValidation and self.valMemory or self.memory
+  local states, actions, rewards, transitions, terminals = memory:retrieve(indices) -- Terminal status is for transition (can't act in terminal state)
   local N = actions:size(1)
 
   -- Perform argmax action selection
@@ -337,6 +338,12 @@ function Agent:learn(x, indices, ISWeights)
     -- Squared loss
     loss = torch.mean(self.tdErr:clone():pow(2):mul(0.5)) -- Average over heads
   end
+
+  -- Exit if being used for validation metrics
+  if isValidation then
+    return
+  end
+
   -- Send TD-errors δ to be used as priorities
   self.memory:updatePriorities(indices, torch.mean(self.tdErr, 2)) -- Use average error over heads