From 1fb279aff00bc128f93d7ea6bef89c8499a0fc07 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 2 Feb 2025 13:36:04 +0400 Subject: [PATCH 01/44] linear2d_layer forward implementation --- src/nf.f90 | 1 + src/nf/nf_linear2d_layer.f90 | 97 ++++++++++++++++++++++++++++++++++++ test/test_linear2d_layer.f90 | 40 +++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 src/nf/nf_linear2d_layer.f90 create mode 100644 test/test_linear2d_layer.f90 diff --git a/src/nf.f90 b/src/nf.f90 index b97d9e62..5a5fb3c3 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -12,4 +12,5 @@ module nf gaussian, linear, relu, leaky_relu, & sigmoid, softmax, softplus, step, tanhf, & celu + use nf_linear2d_layer, only: linear2d_layer end module nf diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 new file mode 100644 index 00000000..4bfbed30 --- /dev/null +++ b/src/nf/nf_linear2d_layer.f90 @@ -0,0 +1,97 @@ +module nf_linear2d_layer + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: linear2d_layer + + type, extends(base_layer) :: linear2d_layer + integer :: batch_size, sequence_length, in_features, out_features + + real, allocatable :: weights(:, :) + real, allocatable :: biases(:) + real, allocatable :: output(:, :, :) + real, allocatable :: gradient(:, :, :) ! input gradient + real, allocatable :: dw(:, :) ! weight gradients + real, allocatable :: db(:) ! bias gradients + + contains + +! procedure :: backward + procedure :: forward + procedure :: init + + end type linear2d_layer + + interface linear2d_layer + module function linear2d_layer_cons(in_features, out_features) & + result(res) + integer, intent(in) :: in_features, out_features + type(linear2d_layer) :: res + end function linear2d_layer_cons + end interface linear2d_layer + + interface + pure module subroutine forward(self, input) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + end subroutine forward + + module subroutine init(self, input_shape) + class(linear2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + end interface + +contains + module function linear2d_layer_cons(& + batch_size, sequence_length, in_features, out_features& + ) result(res) + integer, intent(in) :: batch_size, sequence_length, in_features, out_features + type(linear2d_layer) :: res + + res % in_features = in_features + res % out_features = out_features + res % sequence_length = sequence_length + res % batch_size = batch_size + + call res % init([1]) + end function linear2d_layer_cons + + module subroutine init(self, input_shape) + class(linear2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + integer i, j + + allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) + allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) + + allocate(self%weights(self%in_features, self%out_features)) + do i = 1, self%in_features + do j = 1, self%out_features + self%weights(i, j) = 0.1 + end do + end do + + allocate(self%biases(self%out_features)) + do i = 1, self%out_features + self%biases(i) = 0.11 + end do + end subroutine init + + pure module subroutine forward(self, input) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + integer :: i, j + + do i = 1, self % batch_size + self % output(i, :, :) = matmul(input(i, :, :), self % weights) + do j = 1, self % sequence_length + self % output(i, j, :) = self % output(i, j, :) + self % biases + end do + end do + end subroutine forward +end module nf_linear2d_layer diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 new file mode 100644 index 00000000..6f14fc8c --- /dev/null +++ b/test/test_linear2d_layer.f90 @@ -0,0 +1,40 @@ +program test_linear2d_layer + use iso_fortran_env, only: stderr => error_unit + use nf_linear2d_layer, only: linear2d_layer + implicit none + + logical :: ok = .true. + real :: sample_input(2, 3, 4) = reshape(& + [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,& + 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],& + [2, 3, 4]) ! first batch are 0.1, second 0.2 + type(linear2d_layer) :: linear + + linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) + + call test_linear2d_layer_forward(linear, ok, sample_input) + +contains + subroutine test_linear2d_layer_forward(linear, ok, input) + type(linear2d_layer), intent(in out) :: linear + logical, intent(in out) :: ok + real, intent(in) :: input(2, 3, 4) + real :: output_shape(3) + real :: output_flat(6) + real :: expected_shape(3) = [2, 3, 1] + real :: expected_output_flat(6) = [0.15, 0.19, 0.15, 0.19, 0.15, 0.19] + + call linear % forward(input) + + output_shape = shape(linear % output) + if (.not. all(output_shape.eq.expected_shape)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect shape.. failed' + end if + output_flat = reshape(linear % output, shape(output_flat)) + if (.not. all(output_flat.eq.expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + end subroutine test_linear2d_layer_forward +end program test_linear2d_layer \ No newline at end of file From d997b6bc602ce3cdc4eee540571cff6994abe7e1 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 2 Feb 2025 15:29:06 +0400 Subject: [PATCH 02/44] implement backward --- src/nf/nf_linear2d_layer.f90 | 23 ++++++++++++++- test/test_linear2d_layer.f90 | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 4bfbed30..833ee17e 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -20,7 +20,7 @@ module nf_linear2d_layer contains -! procedure :: backward + procedure :: backward procedure :: forward procedure :: init @@ -80,6 +80,11 @@ module subroutine init(self, input_shape) do i = 1, self%out_features self%biases(i) = 0.11 end do + + allocate(self % dw(self % in_features, self % out_features)) + self % dw = 0.0 + allocate(self % db(self % out_features)) + self % db = 0.0 end subroutine init pure module subroutine forward(self, input) @@ -94,4 +99,20 @@ pure module subroutine forward(self, input) end do end do end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + real, intent(in) :: gradient(:, :, :) + real :: db(self % out_features) + real :: dw(self % in_features, self % out_features) + integer :: i + + do i = 1, self % batch_size + self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :)) + self % db = self % db + sum(gradient(i, :, :), 1) + self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) + end do + + end subroutine backward end module nf_linear2d_layer diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 6f14fc8c..e193d704 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -8,11 +8,13 @@ program test_linear2d_layer [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,& 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],& [2, 3, 4]) ! first batch are 0.1, second 0.2 + real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 2., 2., 2.], [2, 3, 1]) type(linear2d_layer) :: linear linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) call test_linear2d_layer_forward(linear, ok, sample_input) + call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) contains subroutine test_linear2d_layer_forward(linear, ok, input) @@ -37,4 +39,58 @@ subroutine test_linear2d_layer_forward(linear, ok, input) write(stderr, '(a)') 'forward returned incorrect values.. failed' end if end subroutine test_linear2d_layer_forward + + subroutine test_linear2d_layer_backward(linear, ok, input, gradient) + type(linear2d_layer), intent(in out) :: linear + logical, intent(in out) :: ok + real, intent(in) :: input(2, 3, 4) + real, intent(in) :: gradient(2, 3, 1) + real :: gradient_shape(3) + real :: dw_shape(2) + real :: db_shape(1) + real :: gradient_flat(24) + real :: dw_flat(4) + real :: expected_gradient_shape(3) = [2, 3, 4] + real :: expected_dw_shape(2) = [4, 1] + real :: expected_db_shape(1) = [1] + real :: expected_gradient_flat(24) + real :: expected_dw_flat(4) + real :: expected_db(1) = [12.0] + + expected_gradient_flat = 0.200000003 + expected_dw_flat = 1.80000007 + + call linear % backward(input, gradient) + + gradient_shape = shape(linear % gradient) + if (.not. all(gradient_shape.eq.expected_gradient_shape)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect gradient shape.. failed' + end if + dw_shape = shape(linear % dw) + if (.not. all(dw_shape.eq.expected_dw_shape)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect dw shape.. failed' + end if + db_shape = shape(linear % db) + if (.not. all(db_shape.eq.expected_db_shape)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect db shape.. failed' + end if + + gradient_flat = reshape(linear % gradient, shape(gradient_flat)) + if (.not. all(gradient_flat.eq.expected_gradient_flat)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect gradient values.. failed' + end if + dw_flat = reshape(linear % dw, shape(dw_flat)) + if (.not. all(dw_flat.eq.expected_dw_flat)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect dw values.. failed' + end if + if (.not. all(linear % db.eq.expected_db)) then + ok = .false. + write(stderr, '(a)') 'backward returned incorrect db values.. failed' + end if + end subroutine test_linear2d_layer_backward end program test_linear2d_layer \ No newline at end of file From 9919c01f0f85c0d1839f48c4d5b47f31d943f88c Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 2 Feb 2025 15:36:21 +0400 Subject: [PATCH 03/44] introduce concurrency, outtroduce stupidity --- src/nf/nf_linear2d_layer.f90 | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 833ee17e..e2bd4467 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -64,22 +64,15 @@ end function linear2d_layer_cons module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) - integer i, j allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) allocate(self%weights(self%in_features, self%out_features)) - do i = 1, self%in_features - do j = 1, self%out_features - self%weights(i, j) = 0.1 - end do - end do + self % weights = 0.1 allocate(self%biases(self%out_features)) - do i = 1, self%out_features - self%biases(i) = 0.11 - end do + self%biases = 0.11 allocate(self % dw(self % in_features, self % out_features)) self % dw = 0.0 @@ -92,11 +85,11 @@ pure module subroutine forward(self, input) real, intent(in) :: input(:, :, :) integer :: i, j - do i = 1, self % batch_size + do concurrent(i = 1: self % batch_size) self % output(i, :, :) = matmul(input(i, :, :), self % weights) - do j = 1, self % sequence_length - self % output(i, j, :) = self % output(i, j, :) + self % biases - end do + end do + do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length) + self % output(i, j, :) = self % output(i, j, :) + self % biases end do end subroutine forward @@ -108,7 +101,7 @@ pure module subroutine backward(self, input, gradient) real :: dw(self % in_features, self % out_features) integer :: i - do i = 1, self % batch_size + do concurrent(i = 1: self % batch_size) self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :)) self % db = self % db + sum(gradient(i, :, :), 1) self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) From 43d1a1f7933679bba96a63b649f3f81e0dd91011 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 2 Feb 2025 15:37:26 +0400 Subject: [PATCH 04/44] fix style --- src/nf/nf_linear2d_layer.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index e2bd4467..93d52ebc 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -68,10 +68,10 @@ module subroutine init(self, input_shape) allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) - allocate(self%weights(self%in_features, self%out_features)) + allocate(self % weights(self % in_features, self % out_features)) self % weights = 0.1 - allocate(self%biases(self%out_features)) + allocate(self % biases(self % out_features)) self%biases = 0.11 allocate(self % dw(self % in_features, self % out_features)) From 906f21b43b7dd9ef312a38fc38f67b1d81e99196 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:57:16 +0400 Subject: [PATCH 05/44] add parameters api to linear2d_layer --- src/nf/nf_linear2d_layer.f90 | 71 +++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 93d52ebc..4b1204a4 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -23,6 +23,10 @@ module nf_linear2d_layer procedure :: backward procedure :: forward procedure :: init + procedure :: get_num_params + procedure :: get_params + procedure :: get_gradients + procedure :: set_params end type linear2d_layer @@ -58,7 +62,7 @@ module function linear2d_layer_cons(& res % sequence_length = sequence_length res % batch_size = batch_size - call res % init([1]) +! call res % init([1]) end function linear2d_layer_cons module subroutine init(self, input_shape) @@ -106,6 +110,69 @@ pure module subroutine backward(self, input, gradient) self % db = self % db + sum(gradient(i, :, :), 1) self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) end do - end subroutine backward + + pure module function get_num_params(self) result(num_params) + class(linear2d_layer), intent(in) :: self + integer :: num_params + + ! Number of weigths times number of biases + num_params = self % in_features * self % out_features + self % out_features + + end function get_num_params + + + module function get_params(self) result(params) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: params(:) + + real, pointer :: w_(:) => null() + + w_(1:size(self % weights)) => self % weights + + params = [ & + w_, & + self % biases & + ] + + end function get_params + + + module function get_gradients(self) result(gradients) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + + real, pointer :: dw_(:) => null() + + dw_(1:size(self % dw)) => self % dw + + gradients = [ & + dw_, & + self % db & + ] + + end function get_gradients + + + module subroutine set_params(self, params) + class(linear2d_layer), intent(in out) :: self + real, intent(in), target :: params(:) + + real, pointer :: p_(:,:) => null() + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + associate(n => self % in_features * self % out_features) + ! reshape the weights + p_(1:self % in_features, 1:self % out_features) => params(1 : n) + self % weights = p_ + + ! reshape the biases + self % biases = params(n + 1 : n + self % out_features) + end associate + + end subroutine set_params end module nf_linear2d_layer From e1b46955df8e0db2791fbbf08ab594f8d955e4d4 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:57:55 +0400 Subject: [PATCH 06/44] add constructor for linear2d_layer --- src/nf/nf_layer_constructors.f90 | 7 ++++++- src/nf/nf_layer_constructors_submodule.f90 | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index ea1c08df..bc22ed9d 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape + public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d interface input @@ -185,6 +185,11 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape + module function linear2d(batch_size, sequence_length, in_features, out_features) result(res) + integer, intent(in) :: batch_size, sequence_length, in_features, out_features + type(layer) :: res + end function linear2d + end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 4c5994ee..7eebf50c 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -9,6 +9,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_linear2d_layer, only: linear2d_layer use nf_activation, only: activation_function, relu, sigmoid implicit none @@ -148,4 +149,13 @@ module function reshape(output_shape) result(res) end function reshape + module function linear2d(batch_size, sequence_length, in_features, out_features) result(res) + integer, intent(in) :: batch_size, sequence_length, in_features, out_features + type(layer) :: res + + res % name = 'linear2d' + res % layer_shape = [batch_size, sequence_length, out_features] + allocate(res % p, source=linear2d_layer(batch_size, sequence_length, in_features, out_features)) + end function linear2d + end submodule nf_layer_constructors_submodule From 0fe2ef0801c3d0e4336a67c84e20f97d6d3667f3 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:58:32 +0400 Subject: [PATCH 07/44] add integration for linear2d layer --- src/nf/nf_layer_submodule.f90 | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ab8d5b5d..4c176131 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -9,6 +9,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_linear2d_layer, only: linear2d_layer use nf_optimizers, only: optimizer_base_type contains @@ -47,6 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(maxpool2d_layer) call this_layer % backward(prev_layer % output, gradient) + type is(linear2d_layer) + call this_layer % backward(prev_layer % output, gradient) end select end select @@ -116,6 +119,16 @@ pure module subroutine backward_3d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select + type is(linear2d_layer) + select type(prev_layer => previous % p) + type is(input3d_layer) + call this_layer % backward(prev_layer % output, gradient) +! type is(dense_layer) +! call this_layer % forward(prev_layer % output) +! type is(flatten_layer) +! call this_layer % backward(prev_layer % output, gradient) + end select + end select end subroutine backward_3d @@ -182,6 +195,8 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(reshape3d_layer) call this_layer % forward(prev_layer % output) + type is(linear2d_layer) + call this_layer % forward(prev_layer % output) end select type is(reshape3d_layer) @@ -196,6 +211,14 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select + type is(linear2d_layer) + select type(prev_layer => input % p) + type is(input3d_layer) + call this_layer % forward(prev_layer % output) + type is(linear2d_layer) + call this_layer % forward(prev_layer % output) + end select + end select end subroutine forward @@ -328,6 +351,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 + type is (linear2d_layer) + num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select @@ -355,6 +380,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. + type is (linear2d_layer) + params = this_layer % get_params() class default error stop 'Unknown layer type.' end select @@ -382,6 +409,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. + type is (linear2d_layer) + gradients = this_layer % get_gradients() class default error stop 'Unknown layer type.' end select @@ -446,6 +475,9 @@ module subroutine set_params(self, params) class default error stop 'Unknown layer type.' + + type is (linear2d_layer) + call this_layer % set_params(params) end select end subroutine set_params From 957095dd6d62252ea1f7066cf29115c27f08be6f Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:58:58 +0400 Subject: [PATCH 08/44] set usage rules for linear2d_layer --- src/nf/nf_network_submodule.f90 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index e90d92d9..57244046 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -8,6 +8,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer + use nf_linear2d_layer, only: linear2d_layer use nf_layer, only: layer use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape use nf_loss, only: quadratic @@ -129,6 +130,11 @@ module subroutine backward(self, output, loss) self % layers(n - 1), & self % loss % derivative(output, this_layer % output) & ) + type is(flatten_layer) + call self % layers(n) % backward( & + self % layers(n - 1), & + self % loss % derivative(output, this_layer % output) & + ) end select else ! Hidden layer; take the gradient from the next layer @@ -151,6 +157,8 @@ module subroutine backward(self, output, loss) type is(reshape3d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(linear2d_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if From eff36fe2b596e828ae1bf425a52bd3c484e7c736 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:59:20 +0400 Subject: [PATCH 09/44] add linear2d_layer to public api --- src/nf.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf.f90 b/src/nf.f90 index 5a5fb3c3..4351e201 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape + conv2d, dense, flatten, input, maxpool2d, reshape, linear2d use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network From b6f3c97a6a995a174cc7489d7ccf4895729cc704 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 21:59:39 +0400 Subject: [PATCH 10/44] update tests for linear2d layer --- test/test_linear2d_layer.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index e193d704..f07fa115 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -12,6 +12,7 @@ program test_linear2d_layer type(linear2d_layer) :: linear linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) + call linear % init([4]) call test_linear2d_layer_forward(linear, ok, sample_input) call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) From 541d943f7afe2e54cb22d6842295fcd86ee1b9fd Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 22:01:50 +0400 Subject: [PATCH 11/44] remove extra comment --- src/nf/nf_linear2d_layer.f90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 4b1204a4..f2357b53 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -61,8 +61,6 @@ module function linear2d_layer_cons(& res % out_features = out_features res % sequence_length = sequence_length res % batch_size = batch_size - -! call res % init([1]) end function linear2d_layer_cons module subroutine init(self, input_shape) From a27ec090a35df6fbad6cff844c574ce973d62a57 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 3 Feb 2025 22:03:10 +0400 Subject: [PATCH 12/44] remove rubbish --- src/nf/nf_layer_submodule.f90 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 4c176131..e9deb956 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -123,10 +123,6 @@ pure module subroutine backward_3d(self, previous, gradient) select type(prev_layer => previous % p) type is(input3d_layer) call this_layer % backward(prev_layer % output, gradient) -! type is(dense_layer) -! call this_layer % forward(prev_layer % output) -! type is(flatten_layer) -! call this_layer % backward(prev_layer % output, gradient) end select end select From 79abce38ace6350aa60ea6c68cd5a0c69447b45d Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Tue, 4 Feb 2025 17:47:00 +0400 Subject: [PATCH 13/44] move linear2d layer logic into submodule --- src/nf/nf_linear2d_layer.f90 | 151 +++++-------------------- src/nf/nf_linear2d_layer_submodule.f90 | 127 +++++++++++++++++++++ 2 files changed, 154 insertions(+), 124 deletions(-) create mode 100644 src/nf/nf_linear2d_layer_submodule.f90 diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index f2357b53..6e8d082c 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -31,9 +31,10 @@ module nf_linear2d_layer end type linear2d_layer interface linear2d_layer - module function linear2d_layer_cons(in_features, out_features) & - result(res) - integer, intent(in) :: in_features, out_features + module function linear2d_layer_cons(& + batch_size, sequence_length, in_features, out_features& + ) result(res) + integer, intent(in) :: batch_size, sequence_length, in_features, out_features type(linear2d_layer) :: res end function linear2d_layer_cons end interface linear2d_layer @@ -44,133 +45,35 @@ pure module subroutine forward(self, input) real, intent(in) :: input(:, :, :) end subroutine forward + pure module subroutine backward(self, input, gradient) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + real, intent(in) :: gradient(:, :, :) + end subroutine backward + module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) end subroutine init - end interface - -contains - module function linear2d_layer_cons(& - batch_size, sequence_length, in_features, out_features& - ) result(res) - integer, intent(in) :: batch_size, sequence_length, in_features, out_features - type(linear2d_layer) :: res - - res % in_features = in_features - res % out_features = out_features - res % sequence_length = sequence_length - res % batch_size = batch_size - end function linear2d_layer_cons - - module subroutine init(self, input_shape) - class(linear2d_layer), intent(in out) :: self - integer, intent(in) :: input_shape(:) - - allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) - allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) - - allocate(self % weights(self % in_features, self % out_features)) - self % weights = 0.1 - - allocate(self % biases(self % out_features)) - self%biases = 0.11 - - allocate(self % dw(self % in_features, self % out_features)) - self % dw = 0.0 - allocate(self % db(self % out_features)) - self % db = 0.0 - end subroutine init - - pure module subroutine forward(self, input) - class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) - integer :: i, j - - do concurrent(i = 1: self % batch_size) - self % output(i, :, :) = matmul(input(i, :, :), self % weights) - end do - do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length) - self % output(i, j, :) = self % output(i, j, :) + self % biases - end do - end subroutine forward - - pure module subroutine backward(self, input, gradient) - class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) - real, intent(in) :: gradient(:, :, :) - real :: db(self % out_features) - real :: dw(self % in_features, self % out_features) - integer :: i - - do concurrent(i = 1: self % batch_size) - self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :)) - self % db = self % db + sum(gradient(i, :, :), 1) - self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) - end do - end subroutine backward - - pure module function get_num_params(self) result(num_params) - class(linear2d_layer), intent(in) :: self - integer :: num_params - - ! Number of weigths times number of biases - num_params = self % in_features * self % out_features + self % out_features - - end function get_num_params + pure module function get_num_params(self) result(num_params) + class(linear2d_layer), intent(in) :: self + integer :: num_params + end function get_num_params - module function get_params(self) result(params) - class(linear2d_layer), intent(in), target :: self - real, allocatable :: params(:) + module function get_params(self) result(params) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: params(:) + end function get_params - real, pointer :: w_(:) => null() + module function get_gradients(self) result(gradients) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + end function get_gradients - w_(1:size(self % weights)) => self % weights - - params = [ & - w_, & - self % biases & - ] - - end function get_params - - - module function get_gradients(self) result(gradients) - class(linear2d_layer), intent(in), target :: self - real, allocatable :: gradients(:) - - real, pointer :: dw_(:) => null() - - dw_(1:size(self % dw)) => self % dw - - gradients = [ & - dw_, & - self % db & - ] - - end function get_gradients - - - module subroutine set_params(self, params) - class(linear2d_layer), intent(in out) :: self - real, intent(in), target :: params(:) - - real, pointer :: p_(:,:) => null() - - ! check if the number of parameters is correct - if (size(params) /= self % get_num_params()) then - error stop 'Error: number of parameters does not match' - end if - - associate(n => self % in_features * self % out_features) - ! reshape the weights - p_(1:self % in_features, 1:self % out_features) => params(1 : n) - self % weights = p_ - - ! reshape the biases - self % biases = params(n + 1 : n + self % out_features) - end associate - - end subroutine set_params + module subroutine set_params(self, params) + class(linear2d_layer), intent(in out) :: self + real, intent(in), target :: params(:) + end subroutine set_params + end interface end module nf_linear2d_layer diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 new file mode 100644 index 00000000..7b63277d --- /dev/null +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -0,0 +1,127 @@ +submodule(nf_linear2d_layer) nf_linear2d_layer_submodule + use nf_base_layer, only: base_layer + implicit none +contains + module function linear2d_layer_cons(& + batch_size, sequence_length, in_features, out_features& + ) result(res) + integer, intent(in) :: batch_size, sequence_length, in_features, out_features + type(linear2d_layer) :: res + + res % in_features = in_features + res % out_features = out_features + res % sequence_length = sequence_length + res % batch_size = batch_size + end function linear2d_layer_cons + + module subroutine init(self, input_shape) + class(linear2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) + allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) + + allocate(self % weights(self % in_features, self % out_features)) + self % weights = 0.1 + + allocate(self % biases(self % out_features)) + self%biases = 0.11 + + allocate(self % dw(self % in_features, self % out_features)) + self % dw = 0.0 + allocate(self % db(self % out_features)) + self % db = 0.0 + end subroutine init + + pure module subroutine forward(self, input) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + integer :: i, j + + do concurrent(i = 1: self % batch_size) + self % output(i, :, :) = matmul(input(i, :, :), self % weights) + end do + do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length) + self % output(i, j, :) = self % output(i, j, :) + self % biases + end do + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(linear2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :, :) + real, intent(in) :: gradient(:, :, :) + real :: db(self % out_features) + real :: dw(self % in_features, self % out_features) + integer :: i + + do concurrent(i = 1: self % batch_size) + self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :)) + self % db = self % db + sum(gradient(i, :, :), 1) + self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) + end do + end subroutine backward + + pure module function get_num_params(self) result(num_params) + class(linear2d_layer), intent(in) :: self + integer :: num_params + + ! Number of weigths times number of biases + num_params = self % in_features * self % out_features + self % out_features + + end function get_num_params + + + module function get_params(self) result(params) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: params(:) + + real, pointer :: w_(:) => null() + + w_(1:size(self % weights)) => self % weights + + params = [ & + w_, & + self % biases & + ] + + end function get_params + + + module function get_gradients(self) result(gradients) + class(linear2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + + real, pointer :: dw_(:) => null() + + dw_(1:size(self % dw)) => self % dw + + gradients = [ & + dw_, & + self % db & + ] + + end function get_gradients + + + module subroutine set_params(self, params) + class(linear2d_layer), intent(in out) :: self + real, intent(in), target :: params(:) + + real, pointer :: p_(:,:) => null() + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + associate(n => self % in_features * self % out_features) + ! reshape the weights + p_(1:self % in_features, 1:self % out_features) => params(1 : n) + self % weights = p_ + + ! reshape the biases + self % biases = params(n + 1 : n + self % out_features) + end associate + + end subroutine set_params +end submodule nf_linear2d_layer_submodule \ No newline at end of file From 2168ec9a7d14814b55c3b6104067a0f60610b3c3 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Tue, 4 Feb 2025 17:52:23 +0400 Subject: [PATCH 14/44] update cmake for linear2d_layer --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a0a1be4..fc2ddfcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,8 @@ add_library(neural-fortran src/nf/nf_layer_constructors_submodule.f90 src/nf/nf_layer.f90 src/nf/nf_layer_submodule.f90 + src/nf/nf_linear2d_layer.f90 + src/nf/nf_linear2d_layer_submodule.f90 src/nf/nf_loss.f90 src/nf/nf_loss_submodule.f90 src/nf/nf_maxpool2d_layer.f90 From 9a13af30e75a13b69551e6d29051a14707ebf60d Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 5 Feb 2025 10:57:46 +0400 Subject: [PATCH 15/44] update tests for linear2d_layer --- test/test_linear2d_layer.f90 | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index f07fa115..1a674d38 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -8,7 +8,7 @@ program test_linear2d_layer [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,& 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],& [2, 3, 4]) ! first batch are 0.1, second 0.2 - real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 2., 2., 2.], [2, 3, 1]) + real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 3., 3., 3.], [2, 3, 1]) type(linear2d_layer) :: linear linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) @@ -54,12 +54,18 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) real :: expected_gradient_shape(3) = [2, 3, 4] real :: expected_dw_shape(2) = [4, 1] real :: expected_db_shape(1) = [1] - real :: expected_gradient_flat(24) + real :: expected_gradient_flat(24) = [& + 0.200000003, 0.200000003, 0.200000003, 0.300000012,& + 0.300000012, 0.300000012, 0.200000003, 0.200000003,& + 0.200000003, 0.300000012, 0.300000012, 0.300000012,& + 0.200000003, 0.200000003, 0.200000003, 0.300000012,& + 0.300000012, 0.300000012, 0.200000003, 0.200000003,& + 0.200000003, 0.300000012, 0.300000012, 0.300000012& + ] real :: expected_dw_flat(4) - real :: expected_db(1) = [12.0] + real :: expected_db(1) = [15.0] - expected_gradient_flat = 0.200000003 - expected_dw_flat = 1.80000007 + expected_dw_flat = 2.29999995 call linear % backward(input, gradient) From 0db76db4fa69d640dab603f77ecfa0881cf075df Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 5 Feb 2025 11:46:40 +0400 Subject: [PATCH 16/44] update linear2d_layer tests --- test/test_linear2d_layer.f90 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 1a674d38..34613d61 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -8,7 +8,7 @@ program test_linear2d_layer [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,& 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],& [2, 3, 4]) ! first batch are 0.1, second 0.2 - real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 3., 3., 3.], [2, 3, 1]) + real :: sample_gradient(2, 3, 1) = reshape([2., 3., 2., 3., 2., 3.], [2, 3, 1]) type(linear2d_layer) :: linear linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) @@ -55,17 +55,17 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) real :: expected_dw_shape(2) = [4, 1] real :: expected_db_shape(1) = [1] real :: expected_gradient_flat(24) = [& - 0.200000003, 0.200000003, 0.200000003, 0.300000012,& - 0.300000012, 0.300000012, 0.200000003, 0.200000003,& - 0.200000003, 0.300000012, 0.300000012, 0.300000012,& - 0.200000003, 0.200000003, 0.200000003, 0.300000012,& - 0.300000012, 0.300000012, 0.200000003, 0.200000003,& - 0.200000003, 0.300000012, 0.300000012, 0.300000012& + 0.200000003, 0.300000012, 0.200000003, 0.300000012,& + 0.200000003, 0.300000012, 0.200000003, 0.300000012,& + 0.200000003, 0.300000012, 0.200000003, 0.300000012,& + 0.200000003, 0.300000012, 0.200000003, 0.300000012,& + 0.200000003, 0.300000012, 0.200000003, 0.300000012,& + 0.200000003, 0.300000012, 0.200000003, 0.300000012& ] real :: expected_dw_flat(4) real :: expected_db(1) = [15.0] - expected_dw_flat = 2.29999995 + expected_dw_flat = 2.40000010 call linear % backward(input, gradient) From f28ecc0ffc5fcec01ba2419e738736f9ff0cd5ef Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 5 Feb 2025 12:00:24 +0400 Subject: [PATCH 17/44] update linear2d_layer tests for batch last --- test/test_linear2d_layer.f90 | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 34613d61..97bd4d34 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -4,14 +4,14 @@ program test_linear2d_layer implicit none logical :: ok = .true. - real :: sample_input(2, 3, 4) = reshape(& - [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,& - 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],& - [2, 3, 4]) ! first batch are 0.1, second 0.2 - real :: sample_gradient(2, 3, 1) = reshape([2., 3., 2., 3., 2., 3.], [2, 3, 1]) + real :: sample_input(3, 4, 2) = reshape(& + [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,& + 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],& + [3, 4, 2]) ! first batch are 0.1, second 0.2 + real :: sample_gradient(3, 1, 2) = reshape([2., 2., 2., 3., 3., 3.], [3, 1, 2]) type(linear2d_layer) :: linear - linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1) + linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1, batch_size=2) call linear % init([4]) call test_linear2d_layer_forward(linear, ok, sample_input) @@ -21,11 +21,11 @@ program test_linear2d_layer subroutine test_linear2d_layer_forward(linear, ok, input) type(linear2d_layer), intent(in out) :: linear logical, intent(in out) :: ok - real, intent(in) :: input(2, 3, 4) + real, intent(in) :: input(3, 4, 2) real :: output_shape(3) real :: output_flat(6) - real :: expected_shape(3) = [2, 3, 1] - real :: expected_output_flat(6) = [0.15, 0.19, 0.15, 0.19, 0.15, 0.19] + real :: expected_shape(3) = [3, 1, 2] + real :: expected_output_flat(6) = [0.15, 0.15, 0.15, 0.19, 0.19, 0.19] call linear % forward(input) @@ -44,23 +44,23 @@ end subroutine test_linear2d_layer_forward subroutine test_linear2d_layer_backward(linear, ok, input, gradient) type(linear2d_layer), intent(in out) :: linear logical, intent(in out) :: ok - real, intent(in) :: input(2, 3, 4) - real, intent(in) :: gradient(2, 3, 1) + real, intent(in) :: input(3, 4, 2) + real, intent(in) :: gradient(3, 1, 2) real :: gradient_shape(3) real :: dw_shape(2) real :: db_shape(1) real :: gradient_flat(24) real :: dw_flat(4) - real :: expected_gradient_shape(3) = [2, 3, 4] + real :: expected_gradient_shape(3) = [3, 4, 2] real :: expected_dw_shape(2) = [4, 1] real :: expected_db_shape(1) = [1] real :: expected_gradient_flat(24) = [& - 0.200000003, 0.300000012, 0.200000003, 0.300000012,& - 0.200000003, 0.300000012, 0.200000003, 0.300000012,& - 0.200000003, 0.300000012, 0.200000003, 0.300000012,& - 0.200000003, 0.300000012, 0.200000003, 0.300000012,& - 0.200000003, 0.300000012, 0.200000003, 0.300000012,& - 0.200000003, 0.300000012, 0.200000003, 0.300000012& + 0.200000003, 0.200000003, 0.200000003, 0.200000003,& + 0.200000003, 0.200000003, 0.200000003, 0.200000003,& + 0.200000003, 0.200000003, 0.200000003, 0.200000003,& + 0.300000012, 0.300000012, 0.300000012, 0.300000012,& + 0.300000012, 0.300000012, 0.300000012, 0.300000012,& + 0.300000012, 0.300000012, 0.300000012, 0.300000012& ] real :: expected_dw_flat(4) real :: expected_db(1) = [15.0] From 9386aa35d3df0f65c0d710f8ad330dde4a525bf1 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 5 Feb 2025 12:01:40 +0400 Subject: [PATCH 18/44] make linear2d_layer with batch as last dimension (performance) --- src/nf/nf_layer_constructors.f90 | 2 +- src/nf/nf_layer_constructors_submodule.f90 | 6 +++--- src/nf/nf_linear2d_layer.f90 | 4 ++-- src/nf/nf_linear2d_layer_submodule.f90 | 16 ++++++++-------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index bc22ed9d..40480e85 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -185,7 +185,7 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape - module function linear2d(batch_size, sequence_length, in_features, out_features) result(res) + module function linear2d(sequence_length, in_features, out_features, batch_size) result(res) integer, intent(in) :: batch_size, sequence_length, in_features, out_features type(layer) :: res end function linear2d diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 7eebf50c..9bf157fa 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -149,13 +149,13 @@ module function reshape(output_shape) result(res) end function reshape - module function linear2d(batch_size, sequence_length, in_features, out_features) result(res) + module function linear2d(sequence_length, in_features, out_features, batch_size) result(res) integer, intent(in) :: batch_size, sequence_length, in_features, out_features type(layer) :: res res % name = 'linear2d' - res % layer_shape = [batch_size, sequence_length, out_features] - allocate(res % p, source=linear2d_layer(batch_size, sequence_length, in_features, out_features)) + res % layer_shape = [sequence_length, out_features, batch_size] + allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features, batch_size)) end function linear2d end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 6e8d082c..29069ae6 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -9,7 +9,7 @@ module nf_linear2d_layer public :: linear2d_layer type, extends(base_layer) :: linear2d_layer - integer :: batch_size, sequence_length, in_features, out_features + integer :: sequence_length, in_features, out_features, batch_size real, allocatable :: weights(:, :) real, allocatable :: biases(:) @@ -32,7 +32,7 @@ module nf_linear2d_layer interface linear2d_layer module function linear2d_layer_cons(& - batch_size, sequence_length, in_features, out_features& + sequence_length, in_features, out_features, batch_size& ) result(res) integer, intent(in) :: batch_size, sequence_length, in_features, out_features type(linear2d_layer) :: res diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index 7b63277d..a94b18df 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -3,7 +3,7 @@ implicit none contains module function linear2d_layer_cons(& - batch_size, sequence_length, in_features, out_features& + sequence_length, in_features, out_features, batch_size& ) result(res) integer, intent(in) :: batch_size, sequence_length, in_features, out_features type(linear2d_layer) :: res @@ -18,8 +18,8 @@ module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) - allocate(self % output(self % batch_size, self % sequence_length, self % out_features)) - allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features)) + allocate(self % output(self % sequence_length, self % out_features, self % batch_size)) + allocate(self % gradient(self % sequence_length, self % in_features, self % batch_size)) allocate(self % weights(self % in_features, self % out_features)) self % weights = 0.1 @@ -39,10 +39,10 @@ pure module subroutine forward(self, input) integer :: i, j do concurrent(i = 1: self % batch_size) - self % output(i, :, :) = matmul(input(i, :, :), self % weights) + self % output(:, :, i) = matmul(input(:, :, i), self % weights) end do do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length) - self % output(i, j, :) = self % output(i, j, :) + self % biases + self % output(j, :, i) = self % output(j, :, i) + self % biases end do end subroutine forward @@ -55,9 +55,9 @@ pure module subroutine backward(self, input, gradient) integer :: i do concurrent(i = 1: self % batch_size) - self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :)) - self % db = self % db + sum(gradient(i, :, :), 1) - self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights)) + self % dw = self % dw + matmul(transpose(input(:, :, i)), gradient(:, :, i)) + self % db = self % db + sum(gradient(:, :, i), 1) + self % gradient(:, :, i) = matmul(gradient(:, :, i), transpose(self % weights)) end do end subroutine backward From 07750db831290d32c89853f8cfebd27ba1644b5d Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Wed, 12 Feb 2025 23:36:34 +0400 Subject: [PATCH 19/44] linear2d_layer: fix gradient updates --- src/nf/nf_linear2d_layer_submodule.f90 | 4 +- test/test_linear2d_layer.f90 | 67 ++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index a94b18df..7e615df5 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -77,7 +77,7 @@ module function get_params(self) result(params) real, pointer :: w_(:) => null() - w_(1:size(self % weights)) => self % weights + w_(1: product(shape(self % weights))) => self % weights params = [ & w_, & @@ -93,7 +93,7 @@ module function get_gradients(self) result(gradients) real, pointer :: dw_(:) => null() - dw_(1:size(self % dw)) => self % dw + dw_(1: product(shape(self % dw))) => self % dw gradients = [ & dw_, & diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 97bd4d34..27a8f035 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -16,6 +16,7 @@ program test_linear2d_layer call test_linear2d_layer_forward(linear, ok, sample_input) call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) + call test_linear2d_layer_gradient_updates(ok) contains subroutine test_linear2d_layer_forward(linear, ok, input) @@ -100,4 +101,70 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) write(stderr, '(a)') 'backward returned incorrect db values.. failed' end if end subroutine test_linear2d_layer_backward + + subroutine test_linear2d_layer_gradient_updates(ok) + logical, intent(in out) :: ok + real :: input(3, 4, 1) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4, 1]) + real :: gradient(3, 2, 1) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2, 1]) + type(linear2d_layer) :: linear + + integer :: num_parameters + real :: parameters(10) + real :: expected_parameters(10) = [& + 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001,& + 0.109999999, 0.109999999& + ] + real :: gradients(10) + real :: expected_gradients(10) = [& + 1.03999996, 4.09999990, 7.15999985, 1.12400007, 0.240000010, 1.56000006, 2.88000011, 2.86399961,& + 10.1999998, 4.40000010& + ] + real :: updated_parameters(10) + real :: updated_weights(8) + real :: updated_biases(2) + real :: expected_weights(8) = [& + 0.203999996, 0.509999990, 0.816000044, 0.212400019, 0.124000005, 0.256000012, 0.388000011, 0.386399955& + ] + real :: expected_biases(2) = [1.13000000, 0.550000012] + + integer :: i + + linear = linear2d_layer(sequence_length=3, in_features=4, out_features=2, batch_size=1) + call linear % init([4]) + call linear % forward(input) + call linear % backward(input, gradient) + + num_parameters = linear % get_num_params() + if (num_parameters /= 10) then + ok = .false. + write(stderr, '(a)') 'incorrect number of parameters.. failed' + end if + + parameters = linear % get_params() + if (.not. all(parameters.eq.expected_parameters)) then + ok = .false. + write(stderr, '(a)') 'incorrect parameters.. failed' + end if + + gradients = linear % get_gradients() + if (.not. all(gradients.eq.expected_gradients)) then + ok = .false. + write(stderr, '(a)') 'incorrect gradients.. failed' + end if + + do i = 1, num_parameters + updated_parameters(i) = parameters(i) + 0.1 * gradients(i) + end do + call linear % set_params(updated_parameters) + updated_weights = reshape(linear % weights, shape(expected_weights)) + if (.not. all(updated_weights.eq.expected_weights)) then + ok = .false. + write(stderr, '(a)') 'incorrect updated weights.. failed' + end if + updated_biases = linear % biases + if (.not. all(updated_biases.eq.expected_biases)) then + ok = .false. + write(stderr, '(a)') 'incorrect updated biases.. failed' + end if + end subroutine test_linear2d_layer_gradient_updates end program test_linear2d_layer \ No newline at end of file From b5a600a0bb9d8005aecac0fdf31abf548294f843 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Fri, 14 Feb 2025 23:57:16 +0400 Subject: [PATCH 20/44] linear2d_layer: make it 2d --- src/nf/nf_linear2d_layer.f90 | 14 +++++----- test/test_linear2d_layer.f90 | 52 ++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 29069ae6..60c2b0eb 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -13,8 +13,8 @@ module nf_linear2d_layer real, allocatable :: weights(:, :) real, allocatable :: biases(:) - real, allocatable :: output(:, :, :) - real, allocatable :: gradient(:, :, :) ! input gradient + real, allocatable :: output(:, :) + real, allocatable :: gradient(:, :) ! input gradient real, allocatable :: dw(:, :) ! weight gradients real, allocatable :: db(:) ! bias gradients @@ -32,9 +32,9 @@ module nf_linear2d_layer interface linear2d_layer module function linear2d_layer_cons(& - sequence_length, in_features, out_features, batch_size& + sequence_length, in_features, out_features& ) result(res) - integer, intent(in) :: batch_size, sequence_length, in_features, out_features + integer, intent(in) :: sequence_length, in_features, out_features type(linear2d_layer) :: res end function linear2d_layer_cons end interface linear2d_layer @@ -42,13 +42,13 @@ end function linear2d_layer_cons interface pure module subroutine forward(self, input) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) + real, intent(in) :: input(:, :) end subroutine forward pure module subroutine backward(self, input, gradient) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) - real, intent(in) :: gradient(:, :, :) + real, intent(in) :: input(:, :) + real, intent(in) :: gradient(:, :) end subroutine backward module subroutine init(self, input_shape) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 27a8f035..7a383e3d 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -4,14 +4,13 @@ program test_linear2d_layer implicit none logical :: ok = .true. - real :: sample_input(3, 4, 2) = reshape(& - [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,& - 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],& - [3, 4, 2]) ! first batch are 0.1, second 0.2 - real :: sample_gradient(3, 1, 2) = reshape([2., 2., 2., 3., 3., 3.], [3, 1, 2]) + real :: sample_input(3, 4) = reshape(& + [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],& + [3, 4]) ! first batch are 0.1, second 0.2 + real :: sample_gradient(3, 1) = reshape([2., 2., 3.], [3, 1]) type(linear2d_layer) :: linear - linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1, batch_size=2) + linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1) call linear % init([4]) call test_linear2d_layer_forward(linear, ok, sample_input) @@ -22,11 +21,11 @@ program test_linear2d_layer subroutine test_linear2d_layer_forward(linear, ok, input) type(linear2d_layer), intent(in out) :: linear logical, intent(in out) :: ok - real, intent(in) :: input(3, 4, 2) - real :: output_shape(3) - real :: output_flat(6) - real :: expected_shape(3) = [3, 1, 2] - real :: expected_output_flat(6) = [0.15, 0.15, 0.15, 0.19, 0.19, 0.19] + real, intent(in) :: input(3, 4) + real :: output_shape(2) + real :: output_flat(3) + real :: expected_shape(2) = [3, 1] + real :: expected_output_flat(3) = [0.17, 0.17, 0.17] call linear % forward(input) @@ -45,28 +44,23 @@ end subroutine test_linear2d_layer_forward subroutine test_linear2d_layer_backward(linear, ok, input, gradient) type(linear2d_layer), intent(in out) :: linear logical, intent(in out) :: ok - real, intent(in) :: input(3, 4, 2) - real, intent(in) :: gradient(3, 1, 2) - real :: gradient_shape(3) + real, intent(in) :: input(3, 4) + real, intent(in) :: gradient(3, 1) + real :: gradient_shape(2) real :: dw_shape(2) real :: db_shape(1) - real :: gradient_flat(24) + real :: gradient_flat(12) real :: dw_flat(4) - real :: expected_gradient_shape(3) = [3, 4, 2] + real :: expected_gradient_shape(2) = [3, 4] real :: expected_dw_shape(2) = [4, 1] real :: expected_db_shape(1) = [1] - real :: expected_gradient_flat(24) = [& - 0.200000003, 0.200000003, 0.200000003, 0.200000003,& - 0.200000003, 0.200000003, 0.200000003, 0.200000003,& - 0.200000003, 0.200000003, 0.200000003, 0.200000003,& - 0.300000012, 0.300000012, 0.300000012, 0.300000012,& - 0.300000012, 0.300000012, 0.300000012, 0.300000012,& - 0.300000012, 0.300000012, 0.300000012, 0.300000012& + real :: expected_gradient_flat(12) = [& + 0.2, 0.2, 0.3, 0.2,& + 0.2, 0.3, 0.2, 0.2,& + 0.3, 0.2, 0.2, 0.3& ] - real :: expected_dw_flat(4) - real :: expected_db(1) = [15.0] - - expected_dw_flat = 2.40000010 + real :: expected_dw_flat(4) = [0.7, 0.7, 1.4, 1.4] + real :: expected_db(1) = [7] call linear % backward(input, gradient) @@ -104,8 +98,8 @@ end subroutine test_linear2d_layer_backward subroutine test_linear2d_layer_gradient_updates(ok) logical, intent(in out) :: ok - real :: input(3, 4, 1) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4, 1]) - real :: gradient(3, 2, 1) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2, 1]) + real :: input(3, 4) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4]) + real :: gradient(3, 2) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2]) type(linear2d_layer) :: linear integer :: num_parameters From dd1297ea40bb115d912142fbcc8e9dc26713828b Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Fri, 14 Feb 2025 23:59:28 +0400 Subject: [PATCH 21/44] linear2d_layer: forgot a file --- src/nf/nf_linear2d_layer_submodule.f90 | 33 +++++++++++--------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index 7e615df5..1a513fb8 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -3,23 +3,22 @@ implicit none contains module function linear2d_layer_cons(& - sequence_length, in_features, out_features, batch_size& + sequence_length, in_features, out_features& ) result(res) - integer, intent(in) :: batch_size, sequence_length, in_features, out_features + integer, intent(in) :: sequence_length, in_features, out_features type(linear2d_layer) :: res res % in_features = in_features res % out_features = out_features res % sequence_length = sequence_length - res % batch_size = batch_size end function linear2d_layer_cons module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) - allocate(self % output(self % sequence_length, self % out_features, self % batch_size)) - allocate(self % gradient(self % sequence_length, self % in_features, self % batch_size)) + allocate(self % output(self % sequence_length, self % out_features)) + allocate(self % gradient(self % sequence_length, self % in_features)) allocate(self % weights(self % in_features, self % out_features)) self % weights = 0.1 @@ -35,30 +34,26 @@ end subroutine init pure module subroutine forward(self, input) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) - integer :: i, j + real, intent(in) :: input(:, :) + integer :: i - do concurrent(i = 1: self % batch_size) - self % output(:, :, i) = matmul(input(:, :, i), self % weights) - end do - do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length) - self % output(j, :, i) = self % output(j, :, i) + self % biases + self % output(:, :) = matmul(input(:, :), self % weights) + do concurrent(i = 1: self % sequence_length) + self % output(i, :) = self % output(i, :) + self % biases end do end subroutine forward pure module subroutine backward(self, input, gradient) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :, :) - real, intent(in) :: gradient(:, :, :) + real, intent(in) :: input(:, :) + real, intent(in) :: gradient(:, :) real :: db(self % out_features) real :: dw(self % in_features, self % out_features) integer :: i - do concurrent(i = 1: self % batch_size) - self % dw = self % dw + matmul(transpose(input(:, :, i)), gradient(:, :, i)) - self % db = self % db + sum(gradient(:, :, i), 1) - self % gradient(:, :, i) = matmul(gradient(:, :, i), transpose(self % weights)) - end do + self % dw = self % dw + matmul(transpose(input(:, :)), gradient(:, :)) + self % db = self % db + sum(gradient(:, :), 1) + self % gradient(:, :) = matmul(gradient(:, :), transpose(self % weights)) end subroutine backward pure module function get_num_params(self) result(num_params) From e4cb526295cc3b61b3611ec1f64d13443c774cf9 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sat, 15 Feb 2025 00:01:36 +0400 Subject: [PATCH 22/44] linear2d_layer: temporarily remove api --- src/nf/nf_layer_constructors.f90 | 4 +- src/nf/nf_layer_constructors_submodule.f90 | 8 ++-- src/nf/nf_layer_submodule.f90 | 48 +++++++++++----------- src/nf/nf_network_submodule.f90 | 4 +- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index 40480e85..ea923442 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -185,8 +185,8 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape - module function linear2d(sequence_length, in_features, out_features, batch_size) result(res) - integer, intent(in) :: batch_size, sequence_length, in_features, out_features + module function linear2d(sequence_length, in_features, out_features) result(res) + integer, intent(in) :: sequence_length, in_features, out_features type(layer) :: res end function linear2d diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 9bf157fa..52a9e5ab 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -149,13 +149,13 @@ module function reshape(output_shape) result(res) end function reshape - module function linear2d(sequence_length, in_features, out_features, batch_size) result(res) - integer, intent(in) :: batch_size, sequence_length, in_features, out_features + module function linear2d(sequence_length, in_features, out_features) result(res) + integer, intent(in) :: sequence_length, in_features, out_features type(layer) :: res res % name = 'linear2d' - res % layer_shape = [sequence_length, out_features, batch_size] - allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features, batch_size)) + res % layer_shape = [sequence_length, out_features] + allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features)) end function linear2d end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index e9deb956..f46ece84 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -48,8 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(maxpool2d_layer) call this_layer % backward(prev_layer % output, gradient) - type is(linear2d_layer) - call this_layer % backward(prev_layer % output, gradient) +! type is(linear2d_layer) +! call this_layer % backward(prev_layer % output, gradient) end select end select @@ -119,11 +119,11 @@ pure module subroutine backward_3d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select - type is(linear2d_layer) - select type(prev_layer => previous % p) - type is(input3d_layer) - call this_layer % backward(prev_layer % output, gradient) - end select +! type is(linear2d_layer) +! select type(prev_layer => previous % p) +! type is(input3d_layer) +! call this_layer % backward(prev_layer % output, gradient) +! end select end select @@ -191,8 +191,8 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(reshape3d_layer) call this_layer % forward(prev_layer % output) - type is(linear2d_layer) - call this_layer % forward(prev_layer % output) +! type is(linear2d_layer) +! call this_layer % forward(prev_layer % output) end select type is(reshape3d_layer) @@ -207,13 +207,13 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select - type is(linear2d_layer) - select type(prev_layer => input % p) - type is(input3d_layer) - call this_layer % forward(prev_layer % output) - type is(linear2d_layer) - call this_layer % forward(prev_layer % output) - end select +! type is(linear2d_layer) +! select type(prev_layer => input % p) +! type is(input3d_layer) +! call this_layer % forward(prev_layer % output) +! type is(linear2d_layer) +! call this_layer % forward(prev_layer % output) +! end select end select @@ -347,8 +347,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 - type is (linear2d_layer) - num_params = this_layer % get_num_params() +! type is (linear2d_layer) +! num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select @@ -376,8 +376,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. - type is (linear2d_layer) - params = this_layer % get_params() +! type is (linear2d_layer) +! params = this_layer % get_params() class default error stop 'Unknown layer type.' end select @@ -405,8 +405,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. - type is (linear2d_layer) - gradients = this_layer % get_gradients() +! type is (linear2d_layer) +! gradients = this_layer % get_gradients() class default error stop 'Unknown layer type.' end select @@ -472,8 +472,8 @@ module subroutine set_params(self, params) class default error stop 'Unknown layer type.' - type is (linear2d_layer) - call this_layer % set_params(params) +! type is (linear2d_layer) +! call this_layer % set_params(params) end select end subroutine set_params diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 57244046..434ef836 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -157,8 +157,8 @@ module subroutine backward(self, output, loss) type is(reshape3d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) - type is(linear2d_layer) - call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) +! type is(linear2d_layer) +! call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if From 86ec62873e658674416225f7c85dce73b2795c80 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 22:32:01 -0500 Subject: [PATCH 23/44] Don't expose the concrete layer type via nf --- src/nf.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nf.f90 b/src/nf.f90 index 4351e201..e9b027c1 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -12,5 +12,4 @@ module nf gaussian, linear, relu, leaky_relu, & sigmoid, softmax, softplus, step, tanhf, & celu - use nf_linear2d_layer, only: linear2d_layer end module nf From d40aebb51d280e034bea857f8c67fd7d43f97dc3 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 22:32:17 -0500 Subject: [PATCH 24/44] Report success to stdout --- test/test_linear2d_layer.f90 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 7a383e3d..b1f39694 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -17,7 +17,15 @@ program test_linear2d_layer call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) call test_linear2d_layer_gradient_updates(ok) + if (ok) then + print '(a)', 'test_linear2d_layer: All tests passed.' + else + write(stderr, '(a)') 'test_linear2d_layer: One or more tests failed.' + stop 1 + end if + contains + subroutine test_linear2d_layer_forward(linear, ok, input) type(linear2d_layer), intent(in out) :: linear logical, intent(in out) :: ok @@ -161,4 +169,5 @@ subroutine test_linear2d_layer_gradient_updates(ok) write(stderr, '(a)') 'incorrect updated biases.. failed' end if end subroutine test_linear2d_layer_gradient_updates + end program test_linear2d_layer \ No newline at end of file From b80355374fd1b4d231903795aa991201bc0f3524 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 22:32:33 -0500 Subject: [PATCH 25/44] Include linear2d test in cmake --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 35954894..12236416 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ foreach(execid input1d_layer input2d_layer input3d_layer + linear2d_layer parametric_activation dense_layer conv2d_layer From f1a01a67443722bbe3b3ced4c0d3c3460236b416 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 22:34:41 -0500 Subject: [PATCH 26/44] Add Linear2d to README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d2cff5b1..ebf7704d 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ | | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | +| Linear (2-d) | `linear2d` | `input2d` | 2 | ✅ | ✅ | | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ | (*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset. From b39e6dae84319df8d28ec7431c4f6560e3ba689e Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sat, 15 Feb 2025 22:48:40 -0500 Subject: [PATCH 27/44] Plumbing of linear2d with input2d and linear2d --- src/nf/nf_layer_submodule.f90 | 59 +++++++++++++++++++-------------- src/nf/nf_network_submodule.f90 | 4 +-- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index f46ece84..d13362ab 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -64,8 +64,20 @@ pure module subroutine backward_2d(self, previous, gradient) real, intent(in) :: gradient(:,:) ! Backward pass from a 2-d layer downstream currently implemented - ! only for dense and flatten layers - ! CURRENTLY NO LAYERS, tbd: pull/197 and pull/199 + ! only for input2d and linear2d layers + select type(this_layer => self % p) + + type is(linear2d_layer) + + select type(prev_layer => previous % p) + type is(input2d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(linear2d_layer) + call this_layer % backward(prev_layer % output, gradient) + end select + + end select + end subroutine backward_2d @@ -119,12 +131,6 @@ pure module subroutine backward_3d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) end select -! type is(linear2d_layer) -! select type(prev_layer => previous % p) -! type is(input3d_layer) -! call this_layer % backward(prev_layer % output, gradient) -! end select - end select end subroutine backward_3d @@ -207,13 +213,15 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) end select -! type is(linear2d_layer) -! select type(prev_layer => input % p) -! type is(input3d_layer) -! call this_layer % forward(prev_layer % output) -! type is(linear2d_layer) -! call this_layer % forward(prev_layer % output) -! end select + type is(linear2d_layer) + + ! Upstream layers permitted: input2d, linear2d + select type(prev_layer => input % p) + type is(input2d_layer) + call this_layer % forward(prev_layer % output) + type is(linear2d_layer) + call this_layer % forward(prev_layer % output) + end select end select @@ -250,8 +258,10 @@ pure module subroutine get_output_2d(self, output) type is(input2d_layer) allocate(output, source=this_layer % output) + type is(linear2d_layer) + allocate(output, source=this_layer % output) class default - error stop '1-d output can only be read from an input1d, dense, or flatten layer.' + error stop '2-d output can only be read from an input2d or linear2d layer.' end select @@ -347,8 +357,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (reshape3d_layer) num_params = 0 -! type is (linear2d_layer) -! num_params = this_layer % get_num_params() + type is (linear2d_layer) + num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select @@ -376,8 +386,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. -! type is (linear2d_layer) -! params = this_layer % get_params() + type is (linear2d_layer) + params = this_layer % get_params() class default error stop 'Unknown layer type.' end select @@ -405,8 +415,8 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. -! type is (linear2d_layer) -! gradients = this_layer % get_gradients() + type is (linear2d_layer) + gradients = this_layer % get_gradients() class default error stop 'Unknown layer type.' end select @@ -454,6 +464,9 @@ module subroutine set_params(self, params) type is (conv2d_layer) call this_layer % set_params(params) + type is (linear2d_layer) + call this_layer % set_params(params) + type is (maxpool2d_layer) ! No parameters to set. write(stderr, '(a)') 'Warning: calling set_params() ' & @@ -472,8 +485,6 @@ module subroutine set_params(self, params) class default error stop 'Unknown layer type.' -! type is (linear2d_layer) -! call this_layer % set_params(params) end select end subroutine set_params diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 434ef836..57244046 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -157,8 +157,8 @@ module subroutine backward(self, output, loss) type is(reshape3d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) -! type is(linear2d_layer) -! call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(linear2d_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if From 1bec5319a7c6a2883624e5170e5a897ee8429777 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 16:00:35 +0400 Subject: [PATCH 28/44] linear2d_layer: add flatten2d layer --- src/nf/nf_flatten2d_layer.f90 | 75 ++++++++++++++++++ src/nf/nf_flatten2d_layer_submodule.f90 | 48 ++++++++++++ src/nf/nf_layer_constructors.f90 | 21 ++++- src/nf/nf_layer_constructors_submodule.f90 | 8 ++ test/test_flatten2d_layer.f90 | 89 ++++++++++++++++++++++ 5 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 src/nf/nf_flatten2d_layer.f90 create mode 100644 src/nf/nf_flatten2d_layer_submodule.f90 create mode 100644 test/test_flatten2d_layer.f90 diff --git a/src/nf/nf_flatten2d_layer.f90 b/src/nf/nf_flatten2d_layer.f90 new file mode 100644 index 00000000..e67037f8 --- /dev/null +++ b/src/nf/nf_flatten2d_layer.f90 @@ -0,0 +1,75 @@ +module nf_flatten2d_layer + + !! This module provides the concrete flatten2d layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_base_layer, only: base_layer + + implicit none + + private + public :: flatten2d_layer + + type, extends(base_layer) :: flatten2d_layer + + !! Concrete implementation of a flatten2d (2-d to 1-d) layer. + + integer, allocatable :: input_shape(:) + integer :: output_size + + real, allocatable :: gradient(:,:) + real, allocatable :: output(:) + + contains + + procedure :: backward + procedure :: forward + procedure :: init + + end type flatten2d_layer + + interface flatten2d_layer + elemental module function flatten2d_layer_cons() result(res) + !! This function returns the `flatten2d_layer` instance. + type(flatten2d_layer) :: res + !! `flatten2d_layer` instance + end function flatten2d_layer_cons + end interface flatten2d_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward pass to the flatten2d layer. + !! This is a reshape operation from 1-d gradient to 2-d input. + class(flatten2d_layer), intent(in out) :: self + !! flatten2d layer instance + real, intent(in) :: input(:,:) + !! Input from the previous layer + real, intent(in) :: gradient(:) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Propagate forward the layer. + !! Calling this subroutine updates the values of a few data components + !! of `flatten2d_layer` that are needed for the backward pass. + class(flatten2d_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:,:) + !! Input from the previous layer + end subroutine forward + + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(flatten2d_layer), intent(in out) :: self + !! Dense layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + end subroutine init + + end interface + +end module nf_flatten2d_layer diff --git a/src/nf/nf_flatten2d_layer_submodule.f90 b/src/nf/nf_flatten2d_layer_submodule.f90 new file mode 100644 index 00000000..875b7374 --- /dev/null +++ b/src/nf/nf_flatten2d_layer_submodule.f90 @@ -0,0 +1,48 @@ +submodule(nf_flatten2d_layer) nf_flatten2d_layer_submodule + + !! This module provides the concrete flatten2d layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_base_layer, only: base_layer + + implicit none + +contains + + elemental module function flatten2d_layer_cons() result(res) + type(flatten2d_layer) :: res + end function flatten2d_layer_cons + + + pure module subroutine backward(self, input, gradient) + class(flatten2d_layer), intent(in out) :: self + real, intent(in) :: input(:,:) + real, intent(in) :: gradient(:) + self % gradient = reshape(gradient, shape(input)) + end subroutine backward + + + pure module subroutine forward(self, input) + class(flatten2d_layer), intent(in out) :: self + real, intent(in) :: input(:,:) + self % output = pack(input, .true.) + end subroutine forward + + + module subroutine init(self, input_shape) + class(flatten2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % input_shape = input_shape + self % output_size = product(input_shape) + + allocate(self % gradient(input_shape(1), input_shape(2))) + self % gradient = 0 + + allocate(self % output(self % output_size)) + self % output = 0 + + end subroutine init + +end submodule nf_flatten2d_layer_submodule diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index ea923442..cc7bfe49 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d + public :: conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d interface input @@ -125,6 +125,25 @@ module function flatten() result(res) !! Resulting layer instance end function flatten + module function flatten2d() result(res) + !! Flatten (2-d -> 1-d) layer constructor. + !! + !! Use this layer to chain layers with 2-d outputs to layers with 2-d + !! inputs. + !! + !! A flatten layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: flatten, layer + !! type(layer) :: flatten_layer + !! flatten_layer = flatten() + !! ``` + type(layer) :: res + !! Resulting layer instance + end function flatten2d + module function conv2d(filters, kernel_size, activation) result(res) !! 2-d convolutional layer constructor. !! diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 52a9e5ab..ec42a1a6 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -4,6 +4,7 @@ use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer use nf_flatten_layer, only: flatten_layer + use nf_flatten2d_layer, only: flatten2d_layer use nf_input1d_layer, only: input1d_layer use nf_input2d_layer, only: input2d_layer use nf_input3d_layer, only: input3d_layer @@ -72,6 +73,13 @@ module function flatten() result(res) end function flatten + module function flatten2d() result(res) + type(layer) :: res + res % name = 'flatten2d' + allocate(res % p, source=flatten2d_layer()) + end function flatten2d + + module function input1d(layer_size) result(res) integer, intent(in) :: layer_size type(layer) :: res diff --git a/test/test_flatten2d_layer.f90 b/test/test_flatten2d_layer.f90 new file mode 100644 index 00000000..3189b4e9 --- /dev/null +++ b/test/test_flatten2d_layer.f90 @@ -0,0 +1,89 @@ +program test_flatten2d_layer + + use iso_fortran_env, only: stderr => error_unit + use nf, only: dense, flatten2d, input, layer, network + use nf_flatten2d_layer, only: flatten2d_layer + use nf_input2d_layer, only: input2d_layer + + implicit none + + type(layer) :: test_layer, input_layer + type(network) :: net + real, allocatable :: gradient(:,:) + real, allocatable :: output(:) + logical :: ok = .true. + + test_layer = flatten2d() + + if (.not. test_layer % name == 'flatten2d') then + ok = .false. + write(stderr, '(a)') 'flatten2d layer has its name set correctly.. failed' + end if + + if (test_layer % initialized) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer is not initialized yet.. failed' + end if + + input_layer = input(1, 2) + call test_layer % init(input_layer) + + if (.not. test_layer % initialized) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer is now initialized.. failed' + end if + + if (.not. all(test_layer % layer_shape == [2])) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer has an incorrect output shape.. failed' + end if + + ! Test forward pass - reshaping from 2-d to 1-d + + select type(this_layer => input_layer % p); type is(input2d_layer) + call this_layer % set(reshape(real([1, 2, 3, 4]), [2, 2])) + end select + + call test_layer % forward(input_layer) + call test_layer % get_output(output) + + if (.not. all(output == [1, 2, 3, 4])) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer correctly propagates forward.. failed' + end if + + ! Test backward pass - reshaping from 1-d to 2-d + + ! Calling backward() will set the values on the gradient component + ! input_layer is used only to determine shape + call test_layer % backward(input_layer, real([1, 2, 3, 4])) + + select type(this_layer => test_layer % p); type is(flatten2d_layer) + gradient = this_layer % gradient + end select + + if (.not. all(gradient == reshape(real([1, 2, 3, 4]), [2, 2]))) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer correctly propagates backward.. failed' + end if + + net = network([ & + input(28, 28), & + flatten2d(), & + dense(10) & + ]) + + ! Test that the output layer receives 784 elements in the input + if (.not. all(net % layers(3) % input_layer_shape == [784])) then + ok = .false. + write(stderr, '(a)') 'flatten2d layer correctly chains input2d to dense.. failed' + end if + + if (ok) then + print '(a)', 'test_flatten2d_layer: All tests passed.' + else + write(stderr, '(a)') 'test_flatten2d_layer: One or more tests failed.' + stop 1 + end if + +end program test_flatten2d_layer From d01a174834a4a2686d500b31fcf71e430d93dc90 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 16:02:44 +0400 Subject: [PATCH 29/44] linear2d_layer: make linear2d layer work with input2d and flatten2d --- src/nf.f90 | 2 +- src/nf/nf_layer_submodule.f90 | 42 +++++++++++++++++++++++++++------ src/nf/nf_network_submodule.f90 | 5 +++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/nf.f90 b/src/nf.f90 index e9b027c1..d215eb85 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, input, maxpool2d, reshape, linear2d + conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index d13362ab..bb37a965 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -4,6 +4,7 @@ use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer use nf_flatten_layer, only: flatten_layer + use nf_flatten2d_layer, only: flatten2d_layer use nf_input1d_layer, only: input1d_layer use nf_input2d_layer, only: input2d_layer use nf_input3d_layer, only: input3d_layer @@ -48,8 +49,16 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(maxpool2d_layer) call this_layer % backward(prev_layer % output, gradient) -! type is(linear2d_layer) -! call this_layer % backward(prev_layer % output, gradient) + end select + + type is(flatten2d_layer) + + ! Upstream layers permitted: linear2d_layer + select type(prev_layer => previous % p) + type is(linear2d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(input2d_layer) + call this_layer % backward(prev_layer % output, gradient) end select end select @@ -63,8 +72,6 @@ pure module subroutine backward_2d(self, previous, gradient) class(layer), intent(in) :: previous real, intent(in) :: gradient(:,:) - ! Backward pass from a 2-d layer downstream currently implemented - ! only for input2d and linear2d layers select type(this_layer => self % p) type is(linear2d_layer) @@ -197,8 +204,14 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(reshape3d_layer) call this_layer % forward(prev_layer % output) -! type is(linear2d_layer) -! call this_layer % forward(prev_layer % output) + end select + + type is(flatten2d_layer) + select type(prev_layer => input % p) + type is(linear2d_layer) + call this_layer % forward(prev_layer % output) + type is(input2d_layer) + call this_layer % forward(prev_layer % output) end select type is(reshape3d_layer) @@ -241,6 +254,8 @@ pure module subroutine get_output_1d(self, output) allocate(output, source=this_layer % output) type is(flatten_layer) allocate(output, source=this_layer % output) + type is(flatten2d_layer) + allocate(output, source=this_layer % output) class default error stop '1-d output can only be read from an input1d, dense, or flatten layer.' @@ -312,9 +327,11 @@ impure elemental module subroutine init(self, input) self % layer_shape = shape(this_layer % output) type is(flatten_layer) self % layer_shape = shape(this_layer % output) + type is(flatten2d_layer) + self % layer_shape = shape(this_layer % output) end select - self % input_layer_shape = input % layer_shape + self % input_layer_shape = input % layer_shape self % initialized = .true. end subroutine init @@ -355,6 +372,8 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (flatten_layer) num_params = 0 + type is (flatten2d_layer) + num_params = 0 type is (reshape3d_layer) num_params = 0 type is (linear2d_layer) @@ -384,6 +403,8 @@ module function get_params(self) result(params) ! No parameters to get. type is (flatten_layer) ! No parameters to get. + type is (flatten2d_layer) + ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. type is (linear2d_layer) @@ -412,6 +433,8 @@ module function get_gradients(self) result(gradients) type is (maxpool2d_layer) ! No gradients to get. type is (flatten_layer) + ! No parameters to get. + type is (flatten2d_layer) ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. @@ -477,6 +500,11 @@ module subroutine set_params(self, params) write(stderr, '(a)') 'Warning: calling set_params() ' & // 'on a zero-parameter layer; nothing to do.' + type is (flatten2d_layer) + ! No parameters to set. + write(stderr, '(a)') 'Warning: calling set_params() ' & + // 'on a zero-parameter layer; nothing to do.' + type is (reshape3d_layer) ! No parameters to set. write(stderr, '(a)') 'Warning: calling set_params() ' & diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 57244046..c2a9c903 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -151,7 +151,6 @@ module subroutine backward(self, output, loss) else call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient_3d) end if - type is(maxpool2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) @@ -283,6 +282,10 @@ module function predict_2d(self, input) result(res) select type(output_layer => self % layers(num_layers) % p) type is(dense_layer) res = output_layer % output + type is(flatten_layer) + res = output_layer % output + class default + error stop 'network % output not implemented for this output layer' end select end function predict_2d From 141fe57db9227cfe2c2b12cf663011df4d4c2a3f Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 16:05:16 +0400 Subject: [PATCH 30/44] update cmake --- CMakeLists.txt | 2 ++ test/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc2ddfcb..586997fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,8 @@ add_library(neural-fortran src/nf/nf_dense_layer_submodule.f90 src/nf/nf_flatten_layer.f90 src/nf/nf_flatten_layer_submodule.f90 + src/nf/nf_flatten2d_layer.f90 + src/nf/nf_flatten2d_layer_submodule.f90 src/nf/nf_input1d_layer.f90 src/nf/nf_input1d_layer_submodule.f90 src/nf/nf_input2d_layer.f90 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 12236416..b52a3781 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,6 +8,7 @@ foreach(execid conv2d_layer maxpool2d_layer flatten_layer + flatten2d_layer insert_flatten reshape_layer dense_network From c4b8fc70a31bdfe102d90c5cd57649a61e07eab6 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 23:06:56 +0400 Subject: [PATCH 31/44] linear2d_layer: use flatten layer instead of flatten2d --- src/nf/nf_layer_submodule.f90 | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index bb37a965..a5169ea4 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -4,7 +4,6 @@ use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer use nf_flatten_layer, only: flatten_layer - use nf_flatten2d_layer, only: flatten2d_layer use nf_input1d_layer, only: input1d_layer use nf_input2d_layer, only: input2d_layer use nf_input3d_layer, only: input3d_layer @@ -49,16 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(maxpool2d_layer) call this_layer % backward(prev_layer % output, gradient) - end select - - type is(flatten2d_layer) - - ! Upstream layers permitted: linear2d_layer - select type(prev_layer => previous % p) type is(linear2d_layer) call this_layer % backward(prev_layer % output, gradient) - type is(input2d_layer) - call this_layer % backward(prev_layer % output, gradient) end select end select @@ -204,14 +195,8 @@ pure module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(reshape3d_layer) call this_layer % forward(prev_layer % output) - end select - - type is(flatten2d_layer) - select type(prev_layer => input % p) type is(linear2d_layer) call this_layer % forward(prev_layer % output) - type is(input2d_layer) - call this_layer % forward(prev_layer % output) end select type is(reshape3d_layer) @@ -254,8 +239,6 @@ pure module subroutine get_output_1d(self, output) allocate(output, source=this_layer % output) type is(flatten_layer) allocate(output, source=this_layer % output) - type is(flatten2d_layer) - allocate(output, source=this_layer % output) class default error stop '1-d output can only be read from an input1d, dense, or flatten layer.' @@ -327,8 +310,6 @@ impure elemental module subroutine init(self, input) self % layer_shape = shape(this_layer % output) type is(flatten_layer) self % layer_shape = shape(this_layer % output) - type is(flatten2d_layer) - self % layer_shape = shape(this_layer % output) end select self % input_layer_shape = input % layer_shape @@ -372,8 +353,6 @@ elemental module function get_num_params(self) result(num_params) num_params = 0 type is (flatten_layer) num_params = 0 - type is (flatten2d_layer) - num_params = 0 type is (reshape3d_layer) num_params = 0 type is (linear2d_layer) @@ -403,8 +382,6 @@ module function get_params(self) result(params) ! No parameters to get. type is (flatten_layer) ! No parameters to get. - type is (flatten2d_layer) - ! No parameters to get. type is (reshape3d_layer) ! No parameters to get. type is (linear2d_layer) @@ -434,8 +411,6 @@ module function get_gradients(self) result(gradients) ! No gradients to get. type is (flatten_layer) ! No parameters to get. - type is (flatten2d_layer) - ! No gradients to get. type is (reshape3d_layer) ! No gradients to get. type is (linear2d_layer) @@ -500,11 +475,6 @@ module subroutine set_params(self, params) write(stderr, '(a)') 'Warning: calling set_params() ' & // 'on a zero-parameter layer; nothing to do.' - type is (flatten2d_layer) - ! No parameters to set. - write(stderr, '(a)') 'Warning: calling set_params() ' & - // 'on a zero-parameter layer; nothing to do.' - type is (reshape3d_layer) ! No parameters to set. write(stderr, '(a)') 'Warning: calling set_params() ' & From 54d1bb0e42175fb8a6d52251642dfb20153f1a18 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 23:08:55 +0400 Subject: [PATCH 32/44] linear2d_layer: remove flatten2d layer --- src/nf/nf_flatten2d_layer.f90 | 75 ------------------ src/nf/nf_flatten2d_layer_submodule.f90 | 48 ------------ src/nf/nf_layer_constructors.f90 | 21 +---- src/nf/nf_layer_constructors_submodule.f90 | 7 -- test/test_flatten2d_layer.f90 | 89 ---------------------- 5 files changed, 1 insertion(+), 239 deletions(-) delete mode 100644 src/nf/nf_flatten2d_layer.f90 delete mode 100644 src/nf/nf_flatten2d_layer_submodule.f90 delete mode 100644 test/test_flatten2d_layer.f90 diff --git a/src/nf/nf_flatten2d_layer.f90 b/src/nf/nf_flatten2d_layer.f90 deleted file mode 100644 index e67037f8..00000000 --- a/src/nf/nf_flatten2d_layer.f90 +++ /dev/null @@ -1,75 +0,0 @@ -module nf_flatten2d_layer - - !! This module provides the concrete flatten2d layer type. - !! It is used internally by the layer type. - !! It is not intended to be used directly by the user. - - use nf_base_layer, only: base_layer - - implicit none - - private - public :: flatten2d_layer - - type, extends(base_layer) :: flatten2d_layer - - !! Concrete implementation of a flatten2d (2-d to 1-d) layer. - - integer, allocatable :: input_shape(:) - integer :: output_size - - real, allocatable :: gradient(:,:) - real, allocatable :: output(:) - - contains - - procedure :: backward - procedure :: forward - procedure :: init - - end type flatten2d_layer - - interface flatten2d_layer - elemental module function flatten2d_layer_cons() result(res) - !! This function returns the `flatten2d_layer` instance. - type(flatten2d_layer) :: res - !! `flatten2d_layer` instance - end function flatten2d_layer_cons - end interface flatten2d_layer - - interface - - pure module subroutine backward(self, input, gradient) - !! Apply the backward pass to the flatten2d layer. - !! This is a reshape operation from 1-d gradient to 2-d input. - class(flatten2d_layer), intent(in out) :: self - !! flatten2d layer instance - real, intent(in) :: input(:,:) - !! Input from the previous layer - real, intent(in) :: gradient(:) - !! Gradient from the next layer - end subroutine backward - - pure module subroutine forward(self, input) - !! Propagate forward the layer. - !! Calling this subroutine updates the values of a few data components - !! of `flatten2d_layer` that are needed for the backward pass. - class(flatten2d_layer), intent(in out) :: self - !! Dense layer instance - real, intent(in) :: input(:,:) - !! Input from the previous layer - end subroutine forward - - module subroutine init(self, input_shape) - !! Initialize the layer data structures. - !! - !! This is a deferred procedure from the `base_layer` abstract type. - class(flatten2d_layer), intent(in out) :: self - !! Dense layer instance - integer, intent(in) :: input_shape(:) - !! Shape of the input layer - end subroutine init - - end interface - -end module nf_flatten2d_layer diff --git a/src/nf/nf_flatten2d_layer_submodule.f90 b/src/nf/nf_flatten2d_layer_submodule.f90 deleted file mode 100644 index 875b7374..00000000 --- a/src/nf/nf_flatten2d_layer_submodule.f90 +++ /dev/null @@ -1,48 +0,0 @@ -submodule(nf_flatten2d_layer) nf_flatten2d_layer_submodule - - !! This module provides the concrete flatten2d layer type. - !! It is used internally by the layer type. - !! It is not intended to be used directly by the user. - - use nf_base_layer, only: base_layer - - implicit none - -contains - - elemental module function flatten2d_layer_cons() result(res) - type(flatten2d_layer) :: res - end function flatten2d_layer_cons - - - pure module subroutine backward(self, input, gradient) - class(flatten2d_layer), intent(in out) :: self - real, intent(in) :: input(:,:) - real, intent(in) :: gradient(:) - self % gradient = reshape(gradient, shape(input)) - end subroutine backward - - - pure module subroutine forward(self, input) - class(flatten2d_layer), intent(in out) :: self - real, intent(in) :: input(:,:) - self % output = pack(input, .true.) - end subroutine forward - - - module subroutine init(self, input_shape) - class(flatten2d_layer), intent(in out) :: self - integer, intent(in) :: input_shape(:) - - self % input_shape = input_shape - self % output_size = product(input_shape) - - allocate(self % gradient(input_shape(1), input_shape(2))) - self % gradient = 0 - - allocate(self % output(self % output_size)) - self % output = 0 - - end subroutine init - -end submodule nf_flatten2d_layer_submodule diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index cc7bfe49..ea923442 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -8,7 +8,7 @@ module nf_layer_constructors implicit none private - public :: conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d + public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d interface input @@ -125,25 +125,6 @@ module function flatten() result(res) !! Resulting layer instance end function flatten - module function flatten2d() result(res) - !! Flatten (2-d -> 1-d) layer constructor. - !! - !! Use this layer to chain layers with 2-d outputs to layers with 2-d - !! inputs. - !! - !! A flatten layer must not be the first layer in the network. - !! - !! Example: - !! - !! ``` - !! use nf, only :: flatten, layer - !! type(layer) :: flatten_layer - !! flatten_layer = flatten() - !! ``` - type(layer) :: res - !! Resulting layer instance - end function flatten2d - module function conv2d(filters, kernel_size, activation) result(res) !! 2-d convolutional layer constructor. !! diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index ec42a1a6..8809db86 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -4,7 +4,6 @@ use nf_conv2d_layer, only: conv2d_layer use nf_dense_layer, only: dense_layer use nf_flatten_layer, only: flatten_layer - use nf_flatten2d_layer, only: flatten2d_layer use nf_input1d_layer, only: input1d_layer use nf_input2d_layer, only: input2d_layer use nf_input3d_layer, only: input3d_layer @@ -73,12 +72,6 @@ module function flatten() result(res) end function flatten - module function flatten2d() result(res) - type(layer) :: res - res % name = 'flatten2d' - allocate(res % p, source=flatten2d_layer()) - end function flatten2d - module function input1d(layer_size) result(res) integer, intent(in) :: layer_size diff --git a/test/test_flatten2d_layer.f90 b/test/test_flatten2d_layer.f90 deleted file mode 100644 index 3189b4e9..00000000 --- a/test/test_flatten2d_layer.f90 +++ /dev/null @@ -1,89 +0,0 @@ -program test_flatten2d_layer - - use iso_fortran_env, only: stderr => error_unit - use nf, only: dense, flatten2d, input, layer, network - use nf_flatten2d_layer, only: flatten2d_layer - use nf_input2d_layer, only: input2d_layer - - implicit none - - type(layer) :: test_layer, input_layer - type(network) :: net - real, allocatable :: gradient(:,:) - real, allocatable :: output(:) - logical :: ok = .true. - - test_layer = flatten2d() - - if (.not. test_layer % name == 'flatten2d') then - ok = .false. - write(stderr, '(a)') 'flatten2d layer has its name set correctly.. failed' - end if - - if (test_layer % initialized) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer is not initialized yet.. failed' - end if - - input_layer = input(1, 2) - call test_layer % init(input_layer) - - if (.not. test_layer % initialized) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer is now initialized.. failed' - end if - - if (.not. all(test_layer % layer_shape == [2])) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer has an incorrect output shape.. failed' - end if - - ! Test forward pass - reshaping from 2-d to 1-d - - select type(this_layer => input_layer % p); type is(input2d_layer) - call this_layer % set(reshape(real([1, 2, 3, 4]), [2, 2])) - end select - - call test_layer % forward(input_layer) - call test_layer % get_output(output) - - if (.not. all(output == [1, 2, 3, 4])) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer correctly propagates forward.. failed' - end if - - ! Test backward pass - reshaping from 1-d to 2-d - - ! Calling backward() will set the values on the gradient component - ! input_layer is used only to determine shape - call test_layer % backward(input_layer, real([1, 2, 3, 4])) - - select type(this_layer => test_layer % p); type is(flatten2d_layer) - gradient = this_layer % gradient - end select - - if (.not. all(gradient == reshape(real([1, 2, 3, 4]), [2, 2]))) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer correctly propagates backward.. failed' - end if - - net = network([ & - input(28, 28), & - flatten2d(), & - dense(10) & - ]) - - ! Test that the output layer receives 784 elements in the input - if (.not. all(net % layers(3) % input_layer_shape == [784])) then - ok = .false. - write(stderr, '(a)') 'flatten2d layer correctly chains input2d to dense.. failed' - end if - - if (ok) then - print '(a)', 'test_flatten2d_layer: All tests passed.' - else - write(stderr, '(a)') 'test_flatten2d_layer: One or more tests failed.' - stop 1 - end if - -end program test_flatten2d_layer From 9a4422fba4f4bae57ec83da6dba5ad5b8ea2437f Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 23:14:09 +0400 Subject: [PATCH 33/44] linear2d_layer: remove public api --- src/nf.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nf.f90 b/src/nf.f90 index d215eb85..e9b027c1 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -3,7 +3,7 @@ module nf use nf_datasets_mnist, only: label_digits, load_mnist use nf_layer, only: layer use nf_layer_constructors, only: & - conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d + conv2d, dense, flatten, input, maxpool2d, reshape, linear2d use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network From 7606d2c1c01c6293f8b77bf2ba2e398b726cdd48 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 23:31:47 +0400 Subject: [PATCH 34/44] linear2d_layer: update cmakelists --- CMakeLists.txt | 2 -- test/CMakeLists.txt | 1 - 2 files changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 586997fd..fc2ddfcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,6 @@ add_library(neural-fortran src/nf/nf_dense_layer_submodule.f90 src/nf/nf_flatten_layer.f90 src/nf/nf_flatten_layer_submodule.f90 - src/nf/nf_flatten2d_layer.f90 - src/nf/nf_flatten2d_layer_submodule.f90 src/nf/nf_input1d_layer.f90 src/nf/nf_input1d_layer_submodule.f90 src/nf/nf_input2d_layer.f90 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b52a3781..12236416 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,7 +8,6 @@ foreach(execid conv2d_layer maxpool2d_layer flatten_layer - flatten2d_layer insert_flatten reshape_layer dense_network From 7d271fe5d54d3c2b73f47f0fa2bac13db7f9e9cc Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 16 Feb 2025 23:32:27 +0400 Subject: [PATCH 35/44] linear2d_layer: workaround cpu imprecision to make ci happy --- test/test_linear2d_layer.f90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index b1f39694..15d7bf2e 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -58,7 +58,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) real :: dw_shape(2) real :: db_shape(1) real :: gradient_flat(12) - real :: dw_flat(4) + integer :: dw_flat(4) ! cpu imprecision workaround real :: expected_gradient_shape(2) = [3, 4] real :: expected_dw_shape(2) = [4, 1] real :: expected_db_shape(1) = [1] @@ -67,7 +67,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) 0.2, 0.3, 0.2, 0.2,& 0.3, 0.2, 0.2, 0.3& ] - real :: expected_dw_flat(4) = [0.7, 0.7, 1.4, 1.4] + integer :: expected_dw_flat(4) = [7, 7, 14, 14] ! cpu imprecision workaround real :: expected_db(1) = [7] call linear % backward(input, gradient) @@ -93,7 +93,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient) ok = .false. write(stderr, '(a)') 'backward returned incorrect gradient values.. failed' end if - dw_flat = reshape(linear % dw, shape(dw_flat)) + dw_flat = nint(reshape(linear % dw, shape(dw_flat)) * 10) if (.not. all(dw_flat.eq.expected_dw_flat)) then ok = .false. write(stderr, '(a)') 'backward returned incorrect dw values.. failed' From 539fde82de04c8e632e6661019175ce997414a83 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sun, 16 Feb 2025 22:45:34 -0500 Subject: [PATCH 36/44] Add linear2d example --- example/CMakeLists.txt | 1 + example/linear2d.f90 | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 example/linear2d.f90 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 28cf71a7..0257dd7d 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,6 +2,7 @@ foreach(execid cnn_mnist dense_mnist get_set_network_params + linear2d network_parameters simple sine diff --git a/example/linear2d.f90 b/example/linear2d.f90 new file mode 100644 index 00000000..1b71f5d3 --- /dev/null +++ b/example/linear2d.f90 @@ -0,0 +1,29 @@ +program linear2d_example + + use nf, only: input, network, sgd, linear2d, mse, flatten + implicit none + + type(network) :: net + real :: x(3, 4) = reshape( & + [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], & + [3, 4]) + real :: y(3) = [0.12, 0.1, 0.3] + integer, parameter :: num_iterations = 500 + integer :: n + + net = network([ & + input(3, 4), & + linear2d(3, 4, 1), & + flatten() & + ]) + + call net % print_info() + + do n = 1, num_iterations + call net % forward(x) + call net % backward(y, mse()) + call net % update(optimizer=sgd(learning_rate=1.)) + print '(i4,3(3x,f8.6))', n, net % predict(x) + end do + +end program linear2d_example \ No newline at end of file From a97f14114178464481fda386bb45c62a7786fe8b Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 12:10:44 +0400 Subject: [PATCH 37/44] linear2d_layer: remove redundant constructor args --- example/linear2d.f90 | 4 ++-- src/nf/nf_layer_constructors.f90 | 4 ++-- src/nf/nf_layer_constructors_submodule.f90 | 6 +++--- src/nf/nf_linear2d_layer.f90 | 6 ++---- src/nf/nf_linear2d_layer_submodule.f90 | 14 ++++++++------ test/test_linear2d_layer.f90 | 10 +++++----- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/example/linear2d.f90 b/example/linear2d.f90 index 1b71f5d3..06c8b255 100644 --- a/example/linear2d.f90 +++ b/example/linear2d.f90 @@ -8,12 +8,12 @@ program linear2d_example [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], & [3, 4]) real :: y(3) = [0.12, 0.1, 0.3] - integer, parameter :: num_iterations = 500 + integer, parameter :: num_iterations = 5 integer :: n net = network([ & input(3, 4), & - linear2d(3, 4, 1), & + linear2d(3, 1), & flatten() & ]) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index ea923442..b5ea4dbe 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -185,8 +185,8 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape - module function linear2d(sequence_length, in_features, out_features) result(res) - integer, intent(in) :: sequence_length, in_features, out_features + module function linear2d(sequence_length, out_features) result(res) + integer, intent(in) :: sequence_length, out_features type(layer) :: res end function linear2d diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 8809db86..0724a15e 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -150,13 +150,13 @@ module function reshape(output_shape) result(res) end function reshape - module function linear2d(sequence_length, in_features, out_features) result(res) - integer, intent(in) :: sequence_length, in_features, out_features + module function linear2d(sequence_length, out_features) result(res) + integer, intent(in) :: sequence_length, out_features type(layer) :: res res % name = 'linear2d' res % layer_shape = [sequence_length, out_features] - allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features)) + allocate(res % p, source=linear2d_layer(out_features)) end function linear2d end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 60c2b0eb..6f9c6a31 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -31,10 +31,8 @@ module nf_linear2d_layer end type linear2d_layer interface linear2d_layer - module function linear2d_layer_cons(& - sequence_length, in_features, out_features& - ) result(res) - integer, intent(in) :: sequence_length, in_features, out_features + module function linear2d_layer_cons(out_features) result(res) + integer, intent(in) :: out_features type(linear2d_layer) :: res end function linear2d_layer_cons end interface linear2d_layer diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index 1a513fb8..9df7e58e 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -2,21 +2,23 @@ use nf_base_layer, only: base_layer implicit none contains - module function linear2d_layer_cons(& - sequence_length, in_features, out_features& - ) result(res) - integer, intent(in) :: sequence_length, in_features, out_features + module function linear2d_layer_cons(out_features) result(res) + integer, intent(in) :: out_features type(linear2d_layer) :: res - res % in_features = in_features res % out_features = out_features - res % sequence_length = sequence_length end function linear2d_layer_cons module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) + if (size(input_shape) /= 2) then + error stop "Linear2D Layer accepts 2D input" + end if + self % sequence_length = input_shape(1) + self % in_features = input_shape(2) + allocate(self % output(self % sequence_length, self % out_features)) allocate(self % gradient(self % sequence_length, self % in_features)) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index 15d7bf2e..b1f4f55c 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -6,12 +6,12 @@ program test_linear2d_layer logical :: ok = .true. real :: sample_input(3, 4) = reshape(& [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],& - [3, 4]) ! first batch are 0.1, second 0.2 + [3, 4]) real :: sample_gradient(3, 1) = reshape([2., 2., 3.], [3, 1]) type(linear2d_layer) :: linear - linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1) - call linear % init([4]) + linear = linear2d_layer(out_features=1) + call linear % init([3, 4]) call test_linear2d_layer_forward(linear, ok, sample_input) call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) @@ -131,8 +131,8 @@ subroutine test_linear2d_layer_gradient_updates(ok) integer :: i - linear = linear2d_layer(sequence_length=3, in_features=4, out_features=2, batch_size=1) - call linear % init([4]) + linear = linear2d_layer(out_features=2) + call linear % init([3, 4]) call linear % forward(input) call linear % backward(input, gradient) From bbfaf3cda6b0d65d07612f89c65140f1c2d50a9e Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 13:10:50 +0400 Subject: [PATCH 38/44] linear2d_layer: make example converge --- example/linear2d.f90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/example/linear2d.f90 b/example/linear2d.f90 index 06c8b255..79077723 100644 --- a/example/linear2d.f90 +++ b/example/linear2d.f90 @@ -5,10 +5,10 @@ program linear2d_example type(network) :: net real :: x(3, 4) = reshape( & - [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], & + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], & [3, 4]) - real :: y(3) = [0.12, 0.1, 0.3] - integer, parameter :: num_iterations = 5 + real :: y(3) = [0.12, 0.1, 0.2] + integer, parameter :: num_iterations = 9 integer :: n net = network([ & @@ -22,7 +22,7 @@ program linear2d_example do n = 1, num_iterations call net % forward(x) call net % backward(y, mse()) - call net % update(optimizer=sgd(learning_rate=1.)) + call net % update(optimizer=sgd(learning_rate=0.01)) print '(i4,3(3x,f8.6))', n, net % predict(x) end do From 4d28a0a55b12471d2ba8c4087a004741e7399ea9 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 13:47:54 +0400 Subject: [PATCH 39/44] linear2d_layer: make weighs init with normal distribution --- src/nf/nf_linear2d_layer_submodule.f90 | 5 +++-- test/test_linear2d_layer.f90 | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index 9df7e58e..effcd43f 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -1,5 +1,6 @@ submodule(nf_linear2d_layer) nf_linear2d_layer_submodule use nf_base_layer, only: base_layer + use nf_random, only: random_normal implicit none contains module function linear2d_layer_cons(out_features) result(res) @@ -23,10 +24,10 @@ module subroutine init(self, input_shape) allocate(self % gradient(self % sequence_length, self % in_features)) allocate(self % weights(self % in_features, self % out_features)) - self % weights = 0.1 + call random_normal(self % weights) allocate(self % biases(self % out_features)) - self%biases = 0.11 + call random_normal(self % biases) allocate(self % dw(self % in_features, self % out_features)) self % dw = 0.0 diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index b1f4f55c..c345fddb 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -12,6 +12,8 @@ program test_linear2d_layer linear = linear2d_layer(out_features=1) call linear % init([3, 4]) + linear % weights = 0.1 + linear % biases = 0.11 call test_linear2d_layer_forward(linear, ok, sample_input) call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient) From bfc69d5d77984ccdcbcd7d9a42582bdc2ee6ab32 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 13:48:36 +0400 Subject: [PATCH 40/44] linear2d_layer: add loss stopping and more iterations --- example/linear2d.f90 | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/example/linear2d.f90 b/example/linear2d.f90 index 79077723..980d45e4 100644 --- a/example/linear2d.f90 +++ b/example/linear2d.f90 @@ -4,11 +4,14 @@ program linear2d_example implicit none type(network) :: net + type(mse) :: loss real :: x(3, 4) = reshape( & [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], & [3, 4]) real :: y(3) = [0.12, 0.1, 0.2] - integer, parameter :: num_iterations = 9 + real :: preds(3) + real :: loss_value + integer, parameter :: num_iterations = 500 integer :: n net = network([ & @@ -18,12 +21,19 @@ program linear2d_example ]) call net % print_info() + loss = mse() do n = 1, num_iterations call net % forward(x) - call net % backward(y, mse()) + call net % backward(y, loss) call net % update(optimizer=sgd(learning_rate=0.01)) - print '(i4,3(3x,f8.6))', n, net % predict(x) + preds = net % predict(x) + print '(i4,3(3x,f8.6))', n, preds + loss_value = loss % eval (y, preds) + if (loss_value < 0.01) then + print *, 'Loss: ', loss_value + return + end if end do end program linear2d_example \ No newline at end of file From 119a6c84b1afc16f89c00d3099471957573ac986 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 17 Feb 2025 14:08:20 +0400 Subject: [PATCH 41/44] linear2d_layer: update tests --- test/test_linear2d_layer.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90 index c345fddb..28b99bf0 100644 --- a/test/test_linear2d_layer.f90 +++ b/test/test_linear2d_layer.f90 @@ -135,6 +135,8 @@ subroutine test_linear2d_layer_gradient_updates(ok) linear = linear2d_layer(out_features=2) call linear % init([3, 4]) + linear % weights = 0.1 + linear % biases = 0.11 call linear % forward(input) call linear % backward(input, gradient) From 6f33ebe8a4ba4d68e631667f43be3f62b0511c75 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 17 Feb 2025 10:56:00 -0500 Subject: [PATCH 42/44] Tidy up --- example/linear2d.f90 | 13 ++++++---- src/nf/nf_linear2d_layer_submodule.f90 | 33 ++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/example/linear2d.f90 b/example/linear2d.f90 index 980d45e4..f06b633c 100644 --- a/example/linear2d.f90 +++ b/example/linear2d.f90 @@ -4,14 +4,14 @@ program linear2d_example implicit none type(network) :: net - type(mse) :: loss + type(mse) :: loss = mse() real :: x(3, 4) = reshape( & [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], & [3, 4]) real :: y(3) = [0.12, 0.1, 0.2] real :: preds(3) real :: loss_value - integer, parameter :: num_iterations = 500 + integer, parameter :: num_iterations = 10000 integer :: n net = network([ & @@ -21,19 +21,22 @@ program linear2d_example ]) call net % print_info() - loss = mse() do n = 1, num_iterations + call net % forward(x) call net % backward(y, loss) call net % update(optimizer=sgd(learning_rate=0.01)) + preds = net % predict(x) - print '(i4,3(3x,f8.6))', n, preds + print '(i5,3(3x,f9.6))', n, preds + loss_value = loss % eval (y, preds) - if (loss_value < 0.01) then + if (loss_value < 1e-4) then print *, 'Loss: ', loss_value return end if + end do end program linear2d_example \ No newline at end of file diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index effcd43f..eaa93411 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -2,7 +2,9 @@ use nf_base_layer, only: base_layer use nf_random, only: random_normal implicit none + contains + module function linear2d_layer_cons(out_features) result(res) integer, intent(in) :: out_features type(linear2d_layer) :: res @@ -10,12 +12,13 @@ module function linear2d_layer_cons(out_features) result(res) res % out_features = out_features end function linear2d_layer_cons + module subroutine init(self, input_shape) class(linear2d_layer), intent(in out) :: self integer, intent(in) :: input_shape(:) if (size(input_shape) /= 2) then - error stop "Linear2D Layer accepts 2D input" + error stop "linear2d layer requires 2D input." end if self % sequence_length = input_shape(1) self % in_features = input_shape(2) @@ -30,40 +33,45 @@ module subroutine init(self, input_shape) call random_normal(self % biases) allocate(self % dw(self % in_features, self % out_features)) - self % dw = 0.0 + self % dw = 0 allocate(self % db(self % out_features)) - self % db = 0.0 + self % db = 0 + end subroutine init + pure module subroutine forward(self, input) class(linear2d_layer), intent(in out) :: self real, intent(in) :: input(:, :) integer :: i - self % output(:, :) = matmul(input(:, :), self % weights) - do concurrent(i = 1: self % sequence_length) - self % output(i, :) = self % output(i, :) + self % biases + self % output(:,:) = matmul(input(:,:), self % weights) + do concurrent(i = 1:self % sequence_length) + self % output(i,:) = self % output(i,:) + self % biases end do + end subroutine forward + pure module subroutine backward(self, input, gradient) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :) - real, intent(in) :: gradient(:, :) + real, intent(in) :: input(:,:) + real, intent(in) :: gradient(:,:) real :: db(self % out_features) real :: dw(self % in_features, self % out_features) integer :: i - self % dw = self % dw + matmul(transpose(input(:, :)), gradient(:, :)) - self % db = self % db + sum(gradient(:, :), 1) - self % gradient(:, :) = matmul(gradient(:, :), transpose(self % weights)) + self % dw = self % dw + matmul(transpose(input(:,:)), gradient(:,:)) + self % db = self % db + sum(gradient(:,:), 1) + self % gradient(:,:) = matmul(gradient(:,:), transpose(self % weights)) end subroutine backward + pure module function get_num_params(self) result(num_params) class(linear2d_layer), intent(in) :: self integer :: num_params - ! Number of weigths times number of biases + ! Number of weights times number of biases num_params = self % in_features * self % out_features + self % out_features end function get_num_params @@ -122,4 +130,5 @@ module subroutine set_params(self, params) end associate end subroutine set_params + end submodule nf_linear2d_layer_submodule \ No newline at end of file From 678b2c09e795ae71740e005aa97d60110939405d Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 17 Feb 2025 11:19:06 -0500 Subject: [PATCH 43/44] Require passing only out_features to linear2d(); tidy up --- example/linear2d.f90 | 2 +- src/nf/nf_layer_constructors.f90 | 9 +++++++-- src/nf/nf_layer_constructors_submodule.f90 | 7 ++++--- src/nf/nf_layer_submodule.f90 | 4 +++- src/nf/nf_linear2d_layer.f90 | 14 +++++++------- src/nf/nf_linear2d_layer_submodule.f90 | 2 ++ 6 files changed, 24 insertions(+), 14 deletions(-) diff --git a/example/linear2d.f90 b/example/linear2d.f90 index f06b633c..5a31fcf3 100644 --- a/example/linear2d.f90 +++ b/example/linear2d.f90 @@ -16,7 +16,7 @@ program linear2d_example net = network([ & input(3, 4), & - linear2d(3, 1), & + linear2d(1), & flatten() & ]) diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index b5ea4dbe..2983ddcd 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -185,9 +185,14 @@ module function reshape(output_shape) result(res) !! Resulting layer instance end function reshape - module function linear2d(sequence_length, out_features) result(res) - integer, intent(in) :: sequence_length, out_features + module function linear2d(out_features) result(res) + !! Rank-2 (sequence_length, out_features) linear layer constructor. + !! sequence_length is determined at layer initialization, based on the + !! output shape of the previous layer. + integer, intent(in) :: out_features + !! Number of output features type(layer) :: res + !! Resulting layer instance end function linear2d end interface diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 0724a15e..ae7d05dc 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -150,13 +150,14 @@ module function reshape(output_shape) result(res) end function reshape - module function linear2d(sequence_length, out_features) result(res) - integer, intent(in) :: sequence_length, out_features + + module function linear2d(out_features) result(res) + integer, intent(in) :: out_features type(layer) :: res res % name = 'linear2d' - res % layer_shape = [sequence_length, out_features] allocate(res % p, source=linear2d_layer(out_features)) + end function linear2d end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index a5169ea4..22eabe9e 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -301,7 +301,7 @@ impure elemental module subroutine init(self, input) call this_layer % init(input % layer_shape) end select - ! The shape of conv2d, maxpool2d, or flatten layers is not known + ! The shape of linear2d, conv2d, maxpool2d, or flatten layers is not known ! until we receive an input layer. select type(this_layer => self % p) type is(conv2d_layer) @@ -310,6 +310,8 @@ impure elemental module subroutine init(self, input) self % layer_shape = shape(this_layer % output) type is(flatten_layer) self % layer_shape = shape(this_layer % output) + type is(linear2d_layer) + self % layer_shape = shape(this_layer % output) end select self % input_layer_shape = input % layer_shape diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90 index 6f9c6a31..f785a14c 100644 --- a/src/nf/nf_linear2d_layer.f90 +++ b/src/nf/nf_linear2d_layer.f90 @@ -11,11 +11,11 @@ module nf_linear2d_layer type, extends(base_layer) :: linear2d_layer integer :: sequence_length, in_features, out_features, batch_size - real, allocatable :: weights(:, :) + real, allocatable :: weights(:,:) real, allocatable :: biases(:) - real, allocatable :: output(:, :) - real, allocatable :: gradient(:, :) ! input gradient - real, allocatable :: dw(:, :) ! weight gradients + real, allocatable :: output(:,:) + real, allocatable :: gradient(:,:) ! input gradient + real, allocatable :: dw(:,:) ! weight gradients real, allocatable :: db(:) ! bias gradients contains @@ -40,13 +40,13 @@ end function linear2d_layer_cons interface pure module subroutine forward(self, input) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :) + real, intent(in) :: input(:,:) end subroutine forward pure module subroutine backward(self, input, gradient) class(linear2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :) - real, intent(in) :: gradient(:, :) + real, intent(in) :: input(:,:) + real, intent(in) :: gradient(:,:) end subroutine backward module subroutine init(self, input_shape) diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90 index eaa93411..0dfe7e27 100644 --- a/src/nf/nf_linear2d_layer_submodule.f90 +++ b/src/nf/nf_linear2d_layer_submodule.f90 @@ -10,6 +10,7 @@ module function linear2d_layer_cons(out_features) result(res) type(linear2d_layer) :: res res % out_features = out_features + end function linear2d_layer_cons @@ -34,6 +35,7 @@ module subroutine init(self, input_shape) allocate(self % dw(self % in_features, self % out_features)) self % dw = 0 + allocate(self % db(self % out_features)) self % db = 0 From e78ef62b3fb83201828671af00afed808be23f33 Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 17 Feb 2025 13:30:23 -0500 Subject: [PATCH 44/44] Remove linear2d example --- example/CMakeLists.txt | 1 - example/linear2d.f90 | 42 ------------------------------------------ 2 files changed, 43 deletions(-) delete mode 100644 example/linear2d.f90 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 0257dd7d..28cf71a7 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,7 +2,6 @@ foreach(execid cnn_mnist dense_mnist get_set_network_params - linear2d network_parameters simple sine diff --git a/example/linear2d.f90 b/example/linear2d.f90 deleted file mode 100644 index 5a31fcf3..00000000 --- a/example/linear2d.f90 +++ /dev/null @@ -1,42 +0,0 @@ -program linear2d_example - - use nf, only: input, network, sgd, linear2d, mse, flatten - implicit none - - type(network) :: net - type(mse) :: loss = mse() - real :: x(3, 4) = reshape( & - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], & - [3, 4]) - real :: y(3) = [0.12, 0.1, 0.2] - real :: preds(3) - real :: loss_value - integer, parameter :: num_iterations = 10000 - integer :: n - - net = network([ & - input(3, 4), & - linear2d(1), & - flatten() & - ]) - - call net % print_info() - - do n = 1, num_iterations - - call net % forward(x) - call net % backward(y, loss) - call net % update(optimizer=sgd(learning_rate=0.01)) - - preds = net % predict(x) - print '(i5,3(3x,f9.6))', n, preds - - loss_value = loss % eval (y, preds) - if (loss_value < 1e-4) then - print *, 'Loss: ', loss_value - return - end if - - end do - -end program linear2d_example \ No newline at end of file