Trains a network using input data x and output data y, and learning rate eta. The learning rate is normalized with the size of the data batch. mini-batch size number of layers
| Type | Intent | Optional | Attributes | Name | ||
|---|---|---|---|---|---|---|
| class(network_type), | intent(inout) | :: | self | |||
| real(kind=rk), | intent(in) | :: | x(:,:) | |||
| real(kind=rk), | intent(in) | :: | y(:,:) | |||
| real(kind=rk), | intent(in) | :: | eta |
subroutine train_batch(self, x, y, eta)
!! Trains a network using input data x and output data y,
!! and learning rate eta. The learning rate is normalized
!! with the size of the data batch.
class(network_type), intent(in out) :: self
real(rk), intent(in) :: x(:,:), y(:,:), eta
type(array1d), allocatable :: db(:), db_batch(:)
type(array2d), allocatable :: dw(:), dw_batch(:)
integer(ik) :: i, im, n, nm
integer(ik) :: is, ie, indices(2)
im = size(x, dim=2) !! mini-batch size
nm = size(self % dims) !! number of layers
! get start and end index for mini-batch
indices = tile_indices(im)
is = indices(1)
ie = indices(2)
call db_init(db_batch, self % dims)
call dw_init(dw_batch, self % dims)
do concurrent(i = is:ie)
call self % fwdprop(x(:,i))
call self % backprop(y(:,i), dw, db)
do concurrent(n = 1:nm)
dw_batch(n) % array = dw_batch(n) % array + dw(n) % array
db_batch(n) % array = db_batch(n) % array + db(n) % array
end do
end do
if (num_images() > 1) then
call dw_co_sum(dw_batch)
call db_co_sum(db_batch)
end if
call self % update(dw_batch, db_batch, eta / im)
end subroutine train_batch