forked from clementfarabet/lua---nnx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ABSOptimization.lua
45 lines (41 loc) · 1.38 KB
/
ABSOptimization.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
local ABS,parent = torch.class('nn.ABSOptimization', 'nn.SGDOptimization')
-- ABS is Adaptive Batch Size
function ABS:__init(...)
parent.__init(self,...)
xlua.unpack_class(self, {...},
'ABSOptimization', nil,
{arg='theta', type='number',
help='threshold for increasing batch size', default=1}
)
end
function ABS:reduce_hook()
-- standard reduce is to sum the gradients
-- accumulate partial gradients, and average
self.gradParameters:zero()
-- compute mean from the batches
for t = 1,P do
self.gradParameters:add(gradParametersPartial[t])
end
self.gradParameters:div(#inputs)
self.gradStd = torch.Tensor():resizeAs(self.gradParameters):zero()
local tmp = torch.Tensor():resizeAs(self.gradParameters):zero()
-- compute std
-- 1) sum the variances (doing it element-wise)
for t = 1,P do
tmp:add(self.gradParameters):mul(-1):add(gradParametersPartial[t]):pow(2)
self.gradStd:add(tmp)
tmp:zero()
end
-- 2) now take sqrt
self.gradStd:sqrt()
-- test to increase batchSize
if self.theta * self.gradStd:norm() > self.gradParameters:norm() then
self.batchSize = self.batchSize * 2
end
-- return average f(X)
self.output = 0
for t = 1,P do
self.output = self.output + outputsPartial[t]
end
self.output = self.output/#inputs
end