From 1fb279aff00bc128f93d7ea6bef89c8499a0fc07 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 2 Feb 2025 13:36:04 +0400
Subject: [PATCH 01/44] linear2d_layer forward implementation

---
 src/nf.f90                   |  1 +
 src/nf/nf_linear2d_layer.f90 | 97 ++++++++++++++++++++++++++++++++++++
 test/test_linear2d_layer.f90 | 40 +++++++++++++++
 3 files changed, 138 insertions(+)
 create mode 100644 src/nf/nf_linear2d_layer.f90
 create mode 100644 test/test_linear2d_layer.f90

diff --git a/src/nf.f90 b/src/nf.f90
index b97d9e62..5a5fb3c3 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -12,4 +12,5 @@ module nf
                            gaussian, linear, relu, leaky_relu,     &
                            sigmoid, softmax, softplus, step, tanhf, &
                            celu
+  use nf_linear2d_layer, only: linear2d_layer
 end module nf
diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
new file mode 100644
index 00000000..4bfbed30
--- /dev/null
+++ b/src/nf/nf_linear2d_layer.f90
@@ -0,0 +1,97 @@
+module nf_linear2d_layer
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: linear2d_layer
+
+  type, extends(base_layer) :: linear2d_layer
+    integer :: batch_size, sequence_length, in_features, out_features
+
+    real, allocatable :: weights(:, :)
+    real, allocatable :: biases(:)
+    real, allocatable :: output(:, :, :)
+    real, allocatable :: gradient(:, :, :) ! input gradient
+    real, allocatable :: dw(:, :) ! weight gradients
+    real, allocatable :: db(:) ! bias gradients
+
+  contains
+
+!    procedure :: backward
+    procedure :: forward
+    procedure :: init
+
+  end type linear2d_layer
+
+  interface linear2d_layer
+    module function linear2d_layer_cons(in_features, out_features) &
+      result(res)
+      integer, intent(in) :: in_features, out_features
+      type(linear2d_layer) :: res
+    end function linear2d_layer_cons
+  end interface linear2d_layer
+
+  interface
+    pure module subroutine forward(self, input)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :, :)
+    end subroutine forward
+
+    module subroutine init(self, input_shape)
+      class(linear2d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+  end interface
+
+contains
+  module function linear2d_layer_cons(&
+      batch_size, sequence_length, in_features, out_features&
+  ) result(res)
+    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+    type(linear2d_layer) :: res
+
+    res % in_features = in_features
+    res % out_features = out_features
+    res % sequence_length = sequence_length
+    res % batch_size = batch_size
+
+    call res % init([1])
+  end function linear2d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(linear2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+    integer i, j
+
+    allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
+    allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
+
+    allocate(self%weights(self%in_features, self%out_features))
+    do i = 1, self%in_features
+      do j = 1, self%out_features
+        self%weights(i, j) = 0.1
+      end do
+    end do
+
+    allocate(self%biases(self%out_features))
+    do i = 1, self%out_features
+      self%biases(i) = 0.11
+    end do
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :, :)
+    integer :: i, j
+
+    do i = 1, self % batch_size
+      self % output(i, :, :) = matmul(input(i, :, :), self % weights)
+      do j = 1, self % sequence_length
+        self % output(i, j, :) = self % output(i, j, :) + self % biases
+      end do
+    end do
+  end subroutine forward
+end module nf_linear2d_layer
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
new file mode 100644
index 00000000..6f14fc8c
--- /dev/null
+++ b/test/test_linear2d_layer.f90
@@ -0,0 +1,40 @@
+program test_linear2d_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_linear2d_layer, only: linear2d_layer
+  implicit none
+
+  logical :: ok = .true.
+  real :: sample_input(2, 3, 4) = reshape(&
+      [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,&
+       0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],&
+      [2, 3, 4]) ! first batch are 0.1, second 0.2
+  type(linear2d_layer) :: linear
+
+  linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
+
+  call test_linear2d_layer_forward(linear, ok, sample_input)
+
+contains
+  subroutine test_linear2d_layer_forward(linear, ok, input)
+    type(linear2d_layer), intent(in out) :: linear
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(2, 3, 4)
+    real :: output_shape(3)
+    real :: output_flat(6)
+    real :: expected_shape(3) = [2, 3, 1]
+    real :: expected_output_flat(6) = [0.15, 0.19, 0.15, 0.19, 0.15, 0.19]
+
+    call linear % forward(input)
+
+    output_shape = shape(linear % output)
+    if (.not. all(output_shape.eq.expected_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect shape.. failed'
+    end if
+    output_flat = reshape(linear % output, shape(output_flat))
+    if (.not. all(output_flat.eq.expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+  end subroutine test_linear2d_layer_forward
+end program test_linear2d_layer
\ No newline at end of file

From d997b6bc602ce3cdc4eee540571cff6994abe7e1 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 2 Feb 2025 15:29:06 +0400
Subject: [PATCH 02/44] implement backward

---
 src/nf/nf_linear2d_layer.f90 | 23 ++++++++++++++-
 test/test_linear2d_layer.f90 | 56 ++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 4bfbed30..833ee17e 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -20,7 +20,7 @@ module nf_linear2d_layer
 
   contains
 
-!    procedure :: backward
+    procedure :: backward
     procedure :: forward
     procedure :: init
 
@@ -80,6 +80,11 @@ module subroutine init(self, input_shape)
     do i = 1, self%out_features
       self%biases(i) = 0.11
     end do
+
+    allocate(self % dw(self % in_features, self % out_features))
+    self % dw = 0.0
+    allocate(self % db(self % out_features))
+    self % db = 0.0
   end subroutine init
 
   pure module subroutine forward(self, input)
@@ -94,4 +99,20 @@ pure module subroutine forward(self, input)
       end do
     end do
   end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :, :)
+    real, intent(in) :: gradient(:, :, :)
+    real :: db(self % out_features)
+    real :: dw(self % in_features, self % out_features)
+    integer :: i
+
+    do i = 1, self % batch_size
+      self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
+      self % db = self % db + sum(gradient(i, :, :), 1)
+      self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
+    end do
+
+  end subroutine backward
 end module nf_linear2d_layer
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 6f14fc8c..e193d704 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -8,11 +8,13 @@ program test_linear2d_layer
       [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,&
        0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],&
       [2, 3, 4]) ! first batch are 0.1, second 0.2
+  real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 2., 2., 2.], [2, 3, 1])
   type(linear2d_layer) :: linear
 
   linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
+  call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)
 
 contains
   subroutine test_linear2d_layer_forward(linear, ok, input)
@@ -37,4 +39,58 @@ subroutine test_linear2d_layer_forward(linear, ok, input)
       write(stderr, '(a)') 'forward returned incorrect values.. failed'
     end if
   end subroutine test_linear2d_layer_forward
+
+  subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
+    type(linear2d_layer), intent(in out) :: linear
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(2, 3, 4)
+    real, intent(in) :: gradient(2, 3, 1)
+    real :: gradient_shape(3)
+    real :: dw_shape(2)
+    real :: db_shape(1)
+    real :: gradient_flat(24)
+    real :: dw_flat(4)
+    real :: expected_gradient_shape(3) = [2, 3, 4]
+    real :: expected_dw_shape(2) = [4, 1]
+    real :: expected_db_shape(1) = [1]
+    real :: expected_gradient_flat(24)
+    real :: expected_dw_flat(4)
+    real :: expected_db(1) = [12.0]
+
+    expected_gradient_flat = 0.200000003
+    expected_dw_flat = 1.80000007
+
+    call linear % backward(input, gradient)
+
+    gradient_shape = shape(linear % gradient)
+    if (.not. all(gradient_shape.eq.expected_gradient_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect gradient shape.. failed'
+    end if
+    dw_shape = shape(linear % dw)
+    if (.not. all(dw_shape.eq.expected_dw_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect dw shape.. failed'
+    end if
+    db_shape = shape(linear % db)
+    if (.not. all(db_shape.eq.expected_db_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect db shape.. failed'
+    end if
+
+    gradient_flat = reshape(linear % gradient, shape(gradient_flat))
+    if (.not. all(gradient_flat.eq.expected_gradient_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect gradient values.. failed'
+    end if
+    dw_flat = reshape(linear % dw, shape(dw_flat))
+    if (.not. all(dw_flat.eq.expected_dw_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect dw values.. failed'
+    end if
+    if (.not. all(linear % db.eq.expected_db)) then
+      ok = .false.
+      write(stderr, '(a)') 'backward returned incorrect db values.. failed'
+    end if
+  end subroutine test_linear2d_layer_backward
 end program test_linear2d_layer
\ No newline at end of file

From 9919c01f0f85c0d1839f48c4d5b47f31d943f88c Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 2 Feb 2025 15:36:21 +0400
Subject: [PATCH 03/44] introduce concurrency, outtroduce stupidity

---
 src/nf/nf_linear2d_layer.f90 | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 833ee17e..e2bd4467 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -64,22 +64,15 @@ end function linear2d_layer_cons
   module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
-    integer i, j
 
     allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
     allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
 
     allocate(self%weights(self%in_features, self%out_features))
-    do i = 1, self%in_features
-      do j = 1, self%out_features
-        self%weights(i, j) = 0.1
-      end do
-    end do
+    self % weights = 0.1
 
     allocate(self%biases(self%out_features))
-    do i = 1, self%out_features
-      self%biases(i) = 0.11
-    end do
+    self%biases = 0.11
 
     allocate(self % dw(self % in_features, self % out_features))
     self % dw = 0.0
@@ -92,11 +85,11 @@ pure module subroutine forward(self, input)
     real, intent(in) :: input(:, :, :)
     integer :: i, j
 
-    do i = 1, self % batch_size
+    do concurrent(i = 1: self % batch_size)
       self % output(i, :, :) = matmul(input(i, :, :), self % weights)
-      do j = 1, self % sequence_length
-        self % output(i, j, :) = self % output(i, j, :) + self % biases
-      end do
+    end do
+    do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
+      self % output(i, j, :) = self % output(i, j, :) + self % biases
     end do
   end subroutine forward
 
@@ -108,7 +101,7 @@ pure module subroutine backward(self, input, gradient)
     real :: dw(self % in_features, self % out_features)
     integer :: i
 
-    do i = 1, self % batch_size
+    do concurrent(i = 1: self % batch_size)
       self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
       self % db = self % db + sum(gradient(i, :, :), 1)
       self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))

From 43d1a1f7933679bba96a63b649f3f81e0dd91011 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 2 Feb 2025 15:37:26 +0400
Subject: [PATCH 04/44] fix style

---
 src/nf/nf_linear2d_layer.f90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index e2bd4467..93d52ebc 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -68,10 +68,10 @@ module subroutine init(self, input_shape)
     allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
     allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
 
-    allocate(self%weights(self%in_features, self%out_features))
+    allocate(self % weights(self % in_features, self % out_features))
     self % weights = 0.1
 
-    allocate(self%biases(self%out_features))
+    allocate(self % biases(self % out_features))
     self%biases = 0.11
 
     allocate(self % dw(self % in_features, self % out_features))

From 906f21b43b7dd9ef312a38fc38f67b1d81e99196 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:57:16 +0400
Subject: [PATCH 05/44] add parameters api to linear2d_layer

---
 src/nf/nf_linear2d_layer.f90 | 71 +++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 93d52ebc..4b1204a4 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -23,6 +23,10 @@ module nf_linear2d_layer
     procedure :: backward
     procedure :: forward
     procedure :: init
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
 
   end type linear2d_layer
 
@@ -58,7 +62,7 @@ module function linear2d_layer_cons(&
     res % sequence_length = sequence_length
     res % batch_size = batch_size
 
-    call res % init([1])
+!    call res % init([1])
   end function linear2d_layer_cons
 
   module subroutine init(self, input_shape)
@@ -106,6 +110,69 @@ pure module subroutine backward(self, input, gradient)
       self % db = self % db + sum(gradient(i, :, :), 1)
       self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
     end do
-
   end subroutine backward
+
+  pure module function get_num_params(self) result(num_params)
+    class(linear2d_layer), intent(in) :: self
+    integer :: num_params
+
+    ! Number of weigths times number of biases
+    num_params = self % in_features * self % out_features + self % out_features
+
+  end function get_num_params
+
+
+  module function get_params(self) result(params)
+    class(linear2d_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+
+    real, pointer :: w_(:) => null()
+
+    w_(1:size(self % weights)) => self % weights
+
+    params = [ &
+      w_, &
+      self % biases &
+    ]
+
+  end function get_params
+
+
+  module function get_gradients(self) result(gradients)
+    class(linear2d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    real, pointer :: dw_(:) => null()
+
+    dw_(1:size(self % dw)) => self % dw
+
+    gradients = [ &
+      dw_, &
+      self % db &
+    ]
+
+  end function get_gradients
+
+
+  module subroutine set_params(self, params)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in), target :: params(:)
+
+    real, pointer :: p_(:,:) => null()
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    associate(n => self % in_features * self % out_features)
+      ! reshape the weights
+      p_(1:self % in_features, 1:self % out_features) => params(1 : n)
+      self % weights = p_
+
+      ! reshape the biases
+      self % biases = params(n + 1 : n + self % out_features)
+    end associate
+
+  end subroutine set_params
 end module nf_linear2d_layer

From e1b46955df8e0db2791fbbf08ab594f8d955e4d4 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:57:55 +0400
Subject: [PATCH 06/44] add constructor for linear2d_layer

---
 src/nf/nf_layer_constructors.f90           |  7 ++++++-
 src/nf/nf_layer_constructors_submodule.f90 | 10 ++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index ea1c08df..bc22ed9d 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -8,7 +8,7 @@ module nf_layer_constructors
   implicit none
 
   private
-  public :: conv2d, dense, flatten, input, maxpool2d, reshape
+  public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
 
   interface input
 
@@ -185,6 +185,11 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
+    module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+      integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+      type(layer) :: res
+    end function linear2d
+
   end interface
 
 end module nf_layer_constructors
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 4c5994ee..7eebf50c 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -9,6 +9,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_linear2d_layer, only: linear2d_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
@@ -148,4 +149,13 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
+  module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+    type(layer) :: res
+
+    res % name = 'linear2d'
+    res % layer_shape = [batch_size, sequence_length, out_features]
+    allocate(res % p, source=linear2d_layer(batch_size, sequence_length, in_features, out_features))
+  end function linear2d
+
 end submodule nf_layer_constructors_submodule

From 0fe2ef0801c3d0e4336a67c84e20f97d6d3667f3 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:58:32 +0400
Subject: [PATCH 07/44] add integration for linear2d layer

---
 src/nf/nf_layer_submodule.f90 | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ab8d5b5d..4c176131 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -9,6 +9,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_linear2d_layer, only: linear2d_layer
   use nf_optimizers, only: optimizer_base_type
 
 contains
@@ -47,6 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -116,6 +119,16 @@ pure module subroutine backward_3d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
         end select
 
+      type is(linear2d_layer)
+        select type(prev_layer => previous % p)
+          type is(input3d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+!          type is(dense_layer)
+!            call this_layer % forward(prev_layer % output)
+!          type is(flatten_layer)
+!            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
     end select
 
   end subroutine backward_3d
@@ -182,6 +195,8 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -196,6 +211,14 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
         end select
 
+      type is(linear2d_layer)
+        select type(prev_layer => input % p)
+          type is(input3d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
+
     end select
 
   end subroutine forward
@@ -328,6 +351,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (reshape3d_layer)
         num_params = 0
+      type is (linear2d_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -355,6 +380,8 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
+      type is (linear2d_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -382,6 +409,8 @@ module function get_gradients(self) result(gradients)
         ! No gradients to get.
       type is (reshape3d_layer)
         ! No gradients to get.
+      type is (linear2d_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -446,6 +475,9 @@ module subroutine set_params(self, params)
 
           class default
         error stop 'Unknown layer type.'
+
+    type is (linear2d_layer)
+        call this_layer % set_params(params)
     end select
 
   end subroutine set_params

From 957095dd6d62252ea1f7066cf29115c27f08be6f Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:58:58 +0400
Subject: [PATCH 08/44] set usage rules for linear2d_layer

---
 src/nf/nf_network_submodule.f90 | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index e90d92d9..57244046 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -8,6 +8,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_linear2d_layer, only: linear2d_layer
   use nf_layer, only: layer
   use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape
   use nf_loss, only: quadratic
@@ -129,6 +130,11 @@ module subroutine backward(self, output, loss)
               self % layers(n - 1), &
               self % loss % derivative(output, this_layer % output) &
             )
+          type is(flatten_layer)
+            call self % layers(n) % backward( &
+              self % layers(n - 1), &
+              self % loss % derivative(output, this_layer % output) &
+            )
         end select
       else
         ! Hidden layer; take the gradient from the next layer
@@ -151,6 +157,8 @@ module subroutine backward(self, output, loss)
 
           type is(reshape3d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(linear2d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 

From eff36fe2b596e828ae1bf425a52bd3c484e7c736 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:59:20 +0400
Subject: [PATCH 09/44] add linear2d_layer to public api

---
 src/nf.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nf.f90 b/src/nf.f90
index 5a5fb3c3..4351e201 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -3,7 +3,7 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
-    conv2d, dense, flatten, input, maxpool2d, reshape
+    conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network

From b6f3c97a6a995a174cc7489d7ccf4895729cc704 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 21:59:39 +0400
Subject: [PATCH 10/44] update tests for linear2d layer

---
 test/test_linear2d_layer.f90 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index e193d704..f07fa115 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -12,6 +12,7 @@ program test_linear2d_layer
   type(linear2d_layer) :: linear
 
   linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
+  call linear % init([4])
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
   call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)

From 541d943f7afe2e54cb22d6842295fcd86ee1b9fd Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 22:01:50 +0400
Subject: [PATCH 11/44] remove extra comment

---
 src/nf/nf_linear2d_layer.f90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 4b1204a4..f2357b53 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -61,8 +61,6 @@ module function linear2d_layer_cons(&
     res % out_features = out_features
     res % sequence_length = sequence_length
     res % batch_size = batch_size
-
-!    call res % init([1])
   end function linear2d_layer_cons
 
   module subroutine init(self, input_shape)

From a27ec090a35df6fbad6cff844c574ce973d62a57 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 3 Feb 2025 22:03:10 +0400
Subject: [PATCH 12/44] remove rubbish

---
 src/nf/nf_layer_submodule.f90 | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index 4c176131..e9deb956 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -123,10 +123,6 @@ pure module subroutine backward_3d(self, previous, gradient)
         select type(prev_layer => previous % p)
           type is(input3d_layer)
             call this_layer % backward(prev_layer % output, gradient)
-!          type is(dense_layer)
-!            call this_layer % forward(prev_layer % output)
-!          type is(flatten_layer)
-!            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select

From 79abce38ace6350aa60ea6c68cd5a0c69447b45d Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Tue, 4 Feb 2025 17:47:00 +0400
Subject: [PATCH 13/44] move linear2d layer logic into submodule

---
 src/nf/nf_linear2d_layer.f90           | 151 +++++--------------------
 src/nf/nf_linear2d_layer_submodule.f90 | 127 +++++++++++++++++++++
 2 files changed, 154 insertions(+), 124 deletions(-)
 create mode 100644 src/nf/nf_linear2d_layer_submodule.f90

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index f2357b53..6e8d082c 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -31,9 +31,10 @@ module nf_linear2d_layer
   end type linear2d_layer
 
   interface linear2d_layer
-    module function linear2d_layer_cons(in_features, out_features) &
-      result(res)
-      integer, intent(in) :: in_features, out_features
+    module function linear2d_layer_cons(&
+        batch_size, sequence_length, in_features, out_features&
+    ) result(res)
+      integer, intent(in) :: batch_size, sequence_length, in_features, out_features
       type(linear2d_layer) :: res
     end function linear2d_layer_cons
   end interface linear2d_layer
@@ -44,133 +45,35 @@ pure module subroutine forward(self, input)
       real, intent(in) :: input(:, :, :)
     end subroutine forward
 
+    pure module subroutine backward(self, input, gradient)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :, :)
+      real, intent(in) :: gradient(:, :, :)
+    end subroutine backward
+
     module subroutine init(self, input_shape)
       class(linear2d_layer), intent(in out) :: self
       integer, intent(in) :: input_shape(:)
     end subroutine init
-  end interface
-
-contains
-  module function linear2d_layer_cons(&
-      batch_size, sequence_length, in_features, out_features&
-  ) result(res)
-    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
-    type(linear2d_layer) :: res
-
-    res % in_features = in_features
-    res % out_features = out_features
-    res % sequence_length = sequence_length
-    res % batch_size = batch_size
-  end function linear2d_layer_cons
-
-  module subroutine init(self, input_shape)
-    class(linear2d_layer), intent(in out) :: self
-    integer, intent(in) :: input_shape(:)
-
-    allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
-    allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
-
-    allocate(self % weights(self % in_features, self % out_features))
-    self % weights = 0.1
-
-    allocate(self % biases(self % out_features))
-    self%biases = 0.11
-
-    allocate(self % dw(self % in_features, self % out_features))
-    self % dw = 0.0
-    allocate(self % db(self % out_features))
-    self % db = 0.0
-  end subroutine init
-
-  pure module subroutine forward(self, input)
-    class(linear2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :, :)
-    integer :: i, j
-
-    do concurrent(i = 1: self % batch_size)
-      self % output(i, :, :) = matmul(input(i, :, :), self % weights)
-    end do
-    do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
-      self % output(i, j, :) = self % output(i, j, :) + self % biases
-    end do
-  end subroutine forward
-
-  pure module subroutine backward(self, input, gradient)
-    class(linear2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :, :)
-    real, intent(in) :: gradient(:, :, :)
-    real :: db(self % out_features)
-    real :: dw(self % in_features, self % out_features)
-    integer :: i
-
-    do concurrent(i = 1: self % batch_size)
-      self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
-      self % db = self % db + sum(gradient(i, :, :), 1)
-      self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
-    end do
-  end subroutine backward
-
-  pure module function get_num_params(self) result(num_params)
-    class(linear2d_layer), intent(in) :: self
-    integer :: num_params
-
-    ! Number of weigths times number of biases
-    num_params = self % in_features * self % out_features + self % out_features
-
-  end function get_num_params
 
+    pure module function get_num_params(self) result(num_params)
+       class(linear2d_layer), intent(in) :: self
+       integer :: num_params
+    end function get_num_params
 
-  module function get_params(self) result(params)
-    class(linear2d_layer), intent(in), target :: self
-    real, allocatable :: params(:)
+    module function get_params(self) result(params)
+      class(linear2d_layer), intent(in), target :: self
+      real, allocatable :: params(:)
+    end function get_params
 
-    real, pointer :: w_(:) => null()
+    module function get_gradients(self) result(gradients)
+      class(linear2d_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
 
-    w_(1:size(self % weights)) => self % weights
-
-    params = [ &
-      w_, &
-      self % biases &
-    ]
-
-  end function get_params
-
-
-  module function get_gradients(self) result(gradients)
-    class(linear2d_layer), intent(in), target :: self
-    real, allocatable :: gradients(:)
-
-    real, pointer :: dw_(:) => null()
-
-    dw_(1:size(self % dw)) => self % dw
-
-    gradients = [ &
-      dw_, &
-      self % db &
-    ]
-
-  end function get_gradients
-
-
-  module subroutine set_params(self, params)
-    class(linear2d_layer), intent(in out) :: self
-    real, intent(in), target :: params(:)
-
-    real, pointer :: p_(:,:) => null()
-
-    ! check if the number of parameters is correct
-    if (size(params) /= self % get_num_params()) then
-      error stop 'Error: number of parameters does not match'
-    end if
-
-    associate(n => self % in_features * self % out_features)
-      ! reshape the weights
-      p_(1:self % in_features, 1:self % out_features) => params(1 : n)
-      self % weights = p_
-
-      ! reshape the biases
-      self % biases = params(n + 1 : n + self % out_features)
-    end associate
-
-  end subroutine set_params
+    module subroutine set_params(self, params)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in), target :: params(:)
+    end subroutine set_params
+  end interface
 end module nf_linear2d_layer
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
new file mode 100644
index 00000000..7b63277d
--- /dev/null
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -0,0 +1,127 @@
+submodule(nf_linear2d_layer) nf_linear2d_layer_submodule
+  use nf_base_layer, only: base_layer
+  implicit none
+contains
+  module function linear2d_layer_cons(&
+      batch_size, sequence_length, in_features, out_features&
+  ) result(res)
+    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+    type(linear2d_layer) :: res
+
+    res % in_features = in_features
+    res % out_features = out_features
+    res % sequence_length = sequence_length
+    res % batch_size = batch_size
+  end function linear2d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(linear2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
+    allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
+
+    allocate(self % weights(self % in_features, self % out_features))
+    self % weights = 0.1
+
+    allocate(self % biases(self % out_features))
+    self%biases = 0.11
+
+    allocate(self % dw(self % in_features, self % out_features))
+    self % dw = 0.0
+    allocate(self % db(self % out_features))
+    self % db = 0.0
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :, :)
+    integer :: i, j
+
+    do concurrent(i = 1: self % batch_size)
+      self % output(i, :, :) = matmul(input(i, :, :), self % weights)
+    end do
+    do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
+      self % output(i, j, :) = self % output(i, j, :) + self % biases
+    end do
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :, :)
+    real, intent(in) :: gradient(:, :, :)
+    real :: db(self % out_features)
+    real :: dw(self % in_features, self % out_features)
+    integer :: i
+
+    do concurrent(i = 1: self % batch_size)
+      self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
+      self % db = self % db + sum(gradient(i, :, :), 1)
+      self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
+    end do
+  end subroutine backward
+
+  pure module function get_num_params(self) result(num_params)
+    class(linear2d_layer), intent(in) :: self
+    integer :: num_params
+
+    ! Number of weigths times number of biases
+    num_params = self % in_features * self % out_features + self % out_features
+
+  end function get_num_params
+
+
+  module function get_params(self) result(params)
+    class(linear2d_layer), intent(in), target :: self
+    real, allocatable :: params(:)
+
+    real, pointer :: w_(:) => null()
+
+    w_(1:size(self % weights)) => self % weights
+
+    params = [ &
+      w_, &
+      self % biases &
+    ]
+
+  end function get_params
+
+
+  module function get_gradients(self) result(gradients)
+    class(linear2d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    real, pointer :: dw_(:) => null()
+
+    dw_(1:size(self % dw)) => self % dw
+
+    gradients = [ &
+      dw_, &
+      self % db &
+    ]
+
+  end function get_gradients
+
+
+  module subroutine set_params(self, params)
+    class(linear2d_layer), intent(in out) :: self
+    real, intent(in), target :: params(:)
+
+    real, pointer :: p_(:,:) => null()
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    associate(n => self % in_features * self % out_features)
+      ! reshape the weights
+      p_(1:self % in_features, 1:self % out_features) => params(1 : n)
+      self % weights = p_
+
+      ! reshape the biases
+      self % biases = params(n + 1 : n + self % out_features)
+    end associate
+
+  end subroutine set_params
+end submodule nf_linear2d_layer_submodule
\ No newline at end of file

From 2168ec9a7d14814b55c3b6104067a0f60610b3c3 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Tue, 4 Feb 2025 17:52:23 +0400
Subject: [PATCH 14/44] update cmake for linear2d_layer

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a0a1be4..fc2ddfcb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,6 +38,8 @@ add_library(neural-fortran
   src/nf/nf_layer_constructors_submodule.f90
   src/nf/nf_layer.f90
   src/nf/nf_layer_submodule.f90
+  src/nf/nf_linear2d_layer.f90
+  src/nf/nf_linear2d_layer_submodule.f90
   src/nf/nf_loss.f90
   src/nf/nf_loss_submodule.f90
   src/nf/nf_maxpool2d_layer.f90

From 9a13af30e75a13b69551e6d29051a14707ebf60d Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 5 Feb 2025 10:57:46 +0400
Subject: [PATCH 15/44] update tests for linear2d_layer

---
 test/test_linear2d_layer.f90 | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index f07fa115..1a674d38 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -8,7 +8,7 @@ program test_linear2d_layer
       [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,&
        0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],&
       [2, 3, 4]) ! first batch are 0.1, second 0.2
-  real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 2., 2., 2.], [2, 3, 1])
+  real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 3., 3., 3.], [2, 3, 1])
   type(linear2d_layer) :: linear
 
   linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
@@ -54,12 +54,18 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
     real :: expected_gradient_shape(3) = [2, 3, 4]
     real :: expected_dw_shape(2) = [4, 1]
     real :: expected_db_shape(1) = [1]
-    real :: expected_gradient_flat(24)
+    real :: expected_gradient_flat(24) = [&
+        0.200000003, 0.200000003, 0.200000003, 0.300000012,&
+        0.300000012, 0.300000012, 0.200000003, 0.200000003,&
+        0.200000003, 0.300000012, 0.300000012, 0.300000012,&
+        0.200000003, 0.200000003, 0.200000003, 0.300000012,&
+        0.300000012, 0.300000012, 0.200000003, 0.200000003,&
+        0.200000003, 0.300000012, 0.300000012, 0.300000012&
+    ]
     real :: expected_dw_flat(4)
-    real :: expected_db(1) = [12.0]
+    real :: expected_db(1) = [15.0]
 
-    expected_gradient_flat = 0.200000003
-    expected_dw_flat = 1.80000007
+    expected_dw_flat = 2.29999995
 
     call linear % backward(input, gradient)
 

From 0db76db4fa69d640dab603f77ecfa0881cf075df Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 5 Feb 2025 11:46:40 +0400
Subject: [PATCH 16/44] update linear2d_layer tests

---
 test/test_linear2d_layer.f90 | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 1a674d38..34613d61 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -8,7 +8,7 @@ program test_linear2d_layer
       [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,&
        0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],&
       [2, 3, 4]) ! first batch are 0.1, second 0.2
-  real :: sample_gradient(2, 3, 1) = reshape([2., 2., 2., 3., 3., 3.], [2, 3, 1])
+  real :: sample_gradient(2, 3, 1) = reshape([2., 3., 2., 3., 2., 3.], [2, 3, 1])
   type(linear2d_layer) :: linear
 
   linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
@@ -55,17 +55,17 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
     real :: expected_dw_shape(2) = [4, 1]
     real :: expected_db_shape(1) = [1]
     real :: expected_gradient_flat(24) = [&
-        0.200000003, 0.200000003, 0.200000003, 0.300000012,&
-        0.300000012, 0.300000012, 0.200000003, 0.200000003,&
-        0.200000003, 0.300000012, 0.300000012, 0.300000012,&
-        0.200000003, 0.200000003, 0.200000003, 0.300000012,&
-        0.300000012, 0.300000012, 0.200000003, 0.200000003,&
-        0.200000003, 0.300000012, 0.300000012, 0.300000012&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
+        0.200000003, 0.300000012, 0.200000003, 0.300000012&
     ]
     real :: expected_dw_flat(4)
     real :: expected_db(1) = [15.0]
 
-    expected_dw_flat = 2.29999995
+    expected_dw_flat = 2.40000010
 
     call linear % backward(input, gradient)
 

From f28ecc0ffc5fcec01ba2419e738736f9ff0cd5ef Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 5 Feb 2025 12:00:24 +0400
Subject: [PATCH 17/44] update linear2d_layer tests for batch last

---
 test/test_linear2d_layer.f90 | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 34613d61..97bd4d34 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -4,14 +4,14 @@ program test_linear2d_layer
   implicit none
 
   logical :: ok = .true.
-  real :: sample_input(2, 3, 4) = reshape(&
-      [0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2,&
-       0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2, 0.1, 0.2],&
-      [2, 3, 4]) ! first batch are 0.1, second 0.2
-  real :: sample_gradient(2, 3, 1) = reshape([2., 3., 2., 3., 2., 3.], [2, 3, 1])
+  real :: sample_input(3, 4, 2) = reshape(&
+      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,&
+       0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],&
+      [3, 4, 2]) ! first batch are 0.1, second 0.2
+  real :: sample_gradient(3, 1, 2) = reshape([2., 2., 2., 3., 3., 3.], [3, 1, 2])
   type(linear2d_layer) :: linear
 
-  linear = linear2d_layer(batch_size=2, sequence_length=3, in_features=4, out_features=1)
+  linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1, batch_size=2)
   call linear % init([4])
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
@@ -21,11 +21,11 @@ program test_linear2d_layer
   subroutine test_linear2d_layer_forward(linear, ok, input)
     type(linear2d_layer), intent(in out) :: linear
     logical, intent(in out) :: ok
-    real, intent(in) :: input(2, 3, 4)
+    real, intent(in) :: input(3, 4, 2)
     real :: output_shape(3)
     real :: output_flat(6)
-    real :: expected_shape(3) = [2, 3, 1]
-    real :: expected_output_flat(6) = [0.15, 0.19, 0.15, 0.19, 0.15, 0.19]
+    real :: expected_shape(3) = [3, 1, 2]
+    real :: expected_output_flat(6) = [0.15, 0.15, 0.15, 0.19, 0.19, 0.19]
 
     call linear % forward(input)
 
@@ -44,23 +44,23 @@ end subroutine test_linear2d_layer_forward
   subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
     type(linear2d_layer), intent(in out) :: linear
     logical, intent(in out) :: ok
-    real, intent(in) :: input(2, 3, 4)
-    real, intent(in) :: gradient(2, 3, 1)
+    real, intent(in) :: input(3, 4, 2)
+    real, intent(in) :: gradient(3, 1, 2)
     real :: gradient_shape(3)
     real :: dw_shape(2)
     real :: db_shape(1)
     real :: gradient_flat(24)
     real :: dw_flat(4)
-    real :: expected_gradient_shape(3) = [2, 3, 4]
+    real :: expected_gradient_shape(3) = [3, 4, 2]
     real :: expected_dw_shape(2) = [4, 1]
     real :: expected_db_shape(1) = [1]
     real :: expected_gradient_flat(24) = [&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012,&
-        0.200000003, 0.300000012, 0.200000003, 0.300000012&
+        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
+        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
+        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
+        0.300000012, 0.300000012, 0.300000012, 0.300000012,&
+        0.300000012, 0.300000012, 0.300000012, 0.300000012,&
+        0.300000012, 0.300000012, 0.300000012, 0.300000012&
     ]
     real :: expected_dw_flat(4)
     real :: expected_db(1) = [15.0]

From 9386aa35d3df0f65c0d710f8ad330dde4a525bf1 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 5 Feb 2025 12:01:40 +0400
Subject: [PATCH 18/44] make linear2d_layer with batch as last dimension
 (performance)

---
 src/nf/nf_layer_constructors.f90           |  2 +-
 src/nf/nf_layer_constructors_submodule.f90 |  6 +++---
 src/nf/nf_linear2d_layer.f90               |  4 ++--
 src/nf/nf_linear2d_layer_submodule.f90     | 16 ++++++++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index bc22ed9d..40480e85 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -185,7 +185,7 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
-    module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+    module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
       integer, intent(in) :: batch_size, sequence_length, in_features, out_features
       type(layer) :: res
     end function linear2d
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 7eebf50c..9bf157fa 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -149,13 +149,13 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
-  module function linear2d(batch_size, sequence_length, in_features, out_features) result(res)
+  module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
     integer, intent(in) :: batch_size, sequence_length, in_features, out_features
     type(layer) :: res
 
     res % name = 'linear2d'
-    res % layer_shape = [batch_size, sequence_length, out_features]
-    allocate(res % p, source=linear2d_layer(batch_size, sequence_length, in_features, out_features))
+    res % layer_shape = [sequence_length, out_features, batch_size]
+    allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features, batch_size))
   end function linear2d
 
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 6e8d082c..29069ae6 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -9,7 +9,7 @@ module nf_linear2d_layer
   public :: linear2d_layer
 
   type, extends(base_layer) :: linear2d_layer
-    integer :: batch_size, sequence_length, in_features, out_features
+    integer :: sequence_length, in_features, out_features, batch_size
 
     real, allocatable :: weights(:, :)
     real, allocatable :: biases(:)
@@ -32,7 +32,7 @@ module nf_linear2d_layer
 
   interface linear2d_layer
     module function linear2d_layer_cons(&
-        batch_size, sequence_length, in_features, out_features&
+        sequence_length, in_features, out_features, batch_size&
     ) result(res)
       integer, intent(in) :: batch_size, sequence_length, in_features, out_features
       type(linear2d_layer) :: res
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index 7b63277d..a94b18df 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -3,7 +3,7 @@
   implicit none
 contains
   module function linear2d_layer_cons(&
-      batch_size, sequence_length, in_features, out_features&
+      sequence_length, in_features, out_features, batch_size&
   ) result(res)
     integer, intent(in) :: batch_size, sequence_length, in_features, out_features
     type(linear2d_layer) :: res
@@ -18,8 +18,8 @@ module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
-    allocate(self % output(self % batch_size, self % sequence_length, self % out_features))
-    allocate(self % gradient(self % batch_size, self % sequence_length, self % in_features))
+    allocate(self % output(self % sequence_length, self % out_features, self % batch_size))
+    allocate(self % gradient(self % sequence_length, self % in_features, self % batch_size))
 
     allocate(self % weights(self % in_features, self % out_features))
     self % weights = 0.1
@@ -39,10 +39,10 @@ pure module subroutine forward(self, input)
     integer :: i, j
 
     do concurrent(i = 1: self % batch_size)
-      self % output(i, :, :) = matmul(input(i, :, :), self % weights)
+      self % output(:, :, i) = matmul(input(:, :, i), self % weights)
     end do
     do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
-      self % output(i, j, :) = self % output(i, j, :) + self % biases
+      self % output(j, :, i) = self % output(j, :, i) + self % biases
     end do
   end subroutine forward
 
@@ -55,9 +55,9 @@ pure module subroutine backward(self, input, gradient)
     integer :: i
 
     do concurrent(i = 1: self % batch_size)
-      self % dw = self % dw + matmul(transpose(input(i, :, :)), gradient(i, :, :))
-      self % db = self % db + sum(gradient(i, :, :), 1)
-      self % gradient(i, :, :) = matmul(gradient(i, :, :), transpose(self % weights))
+      self % dw = self % dw + matmul(transpose(input(:, :, i)), gradient(:, :, i))
+      self % db = self % db + sum(gradient(:, :, i), 1)
+      self % gradient(:, :, i) = matmul(gradient(:, :, i), transpose(self % weights))
     end do
   end subroutine backward
 

From 07750db831290d32c89853f8cfebd27ba1644b5d Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Wed, 12 Feb 2025 23:36:34 +0400
Subject: [PATCH 19/44] linear2d_layer: fix gradient updates

---
 src/nf/nf_linear2d_layer_submodule.f90 |  4 +-
 test/test_linear2d_layer.f90           | 67 ++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index a94b18df..7e615df5 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -77,7 +77,7 @@ module function get_params(self) result(params)
 
     real, pointer :: w_(:) => null()
 
-    w_(1:size(self % weights)) => self % weights
+    w_(1: product(shape(self % weights))) => self % weights
 
     params = [ &
       w_, &
@@ -93,7 +93,7 @@ module function get_gradients(self) result(gradients)
 
     real, pointer :: dw_(:) => null()
 
-    dw_(1:size(self % dw)) => self % dw
+    dw_(1: product(shape(self % dw))) => self % dw
 
     gradients = [ &
       dw_, &
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 97bd4d34..27a8f035 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -16,6 +16,7 @@ program test_linear2d_layer
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
   call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)
+  call test_linear2d_layer_gradient_updates(ok)
 
 contains
   subroutine test_linear2d_layer_forward(linear, ok, input)
@@ -100,4 +101,70 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
       write(stderr, '(a)') 'backward returned incorrect db values.. failed'
     end if
   end subroutine test_linear2d_layer_backward
+
+  subroutine test_linear2d_layer_gradient_updates(ok)
+    logical, intent(in out) :: ok
+    real :: input(3, 4, 1) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4, 1])
+    real :: gradient(3, 2, 1) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2, 1])
+    type(linear2d_layer) :: linear
+
+    integer :: num_parameters
+    real :: parameters(10)
+    real :: expected_parameters(10) = [&
+        0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001, 0.100000001,&
+        0.109999999, 0.109999999&
+    ]
+    real :: gradients(10)
+    real :: expected_gradients(10) = [&
+        1.03999996, 4.09999990, 7.15999985, 1.12400007, 0.240000010, 1.56000006, 2.88000011, 2.86399961,&
+        10.1999998, 4.40000010&
+    ]
+    real :: updated_parameters(10)
+    real :: updated_weights(8)
+    real :: updated_biases(2)
+    real :: expected_weights(8) = [&
+        0.203999996, 0.509999990, 0.816000044, 0.212400019, 0.124000005, 0.256000012, 0.388000011, 0.386399955&
+    ]
+    real :: expected_biases(2) = [1.13000000, 0.550000012]
+
+    integer :: i
+
+    linear = linear2d_layer(sequence_length=3, in_features=4, out_features=2, batch_size=1)
+    call linear % init([4])
+    call linear % forward(input)
+    call linear % backward(input, gradient)
+
+    num_parameters = linear % get_num_params()
+    if (num_parameters /= 10) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect number of parameters.. failed'
+    end if
+
+    parameters = linear % get_params()
+    if (.not. all(parameters.eq.expected_parameters)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect parameters.. failed'
+    end if
+
+    gradients = linear % get_gradients()
+    if (.not. all(gradients.eq.expected_gradients)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect gradients.. failed'
+    end if
+
+    do i = 1, num_parameters
+      updated_parameters(i) = parameters(i) + 0.1 * gradients(i)
+    end do
+    call linear % set_params(updated_parameters)
+    updated_weights = reshape(linear % weights, shape(expected_weights))
+    if (.not. all(updated_weights.eq.expected_weights)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect updated weights.. failed'
+    end if
+    updated_biases = linear % biases
+    if (.not. all(updated_biases.eq.expected_biases)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect updated biases.. failed'
+    end if
+  end subroutine test_linear2d_layer_gradient_updates
 end program test_linear2d_layer
\ No newline at end of file

From b5a600a0bb9d8005aecac0fdf31abf548294f843 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Fri, 14 Feb 2025 23:57:16 +0400
Subject: [PATCH 20/44] linear2d_layer: make it 2d

---
 src/nf/nf_linear2d_layer.f90 | 14 +++++-----
 test/test_linear2d_layer.f90 | 52 ++++++++++++++++--------------------
 2 files changed, 30 insertions(+), 36 deletions(-)

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 29069ae6..60c2b0eb 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -13,8 +13,8 @@ module nf_linear2d_layer
 
     real, allocatable :: weights(:, :)
     real, allocatable :: biases(:)
-    real, allocatable :: output(:, :, :)
-    real, allocatable :: gradient(:, :, :) ! input gradient
+    real, allocatable :: output(:, :)
+    real, allocatable :: gradient(:, :) ! input gradient
     real, allocatable :: dw(:, :) ! weight gradients
     real, allocatable :: db(:) ! bias gradients
 
@@ -32,9 +32,9 @@ module nf_linear2d_layer
 
   interface linear2d_layer
     module function linear2d_layer_cons(&
-        sequence_length, in_features, out_features, batch_size&
+        sequence_length, in_features, out_features&
     ) result(res)
-      integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+      integer, intent(in) :: sequence_length, in_features, out_features
       type(linear2d_layer) :: res
     end function linear2d_layer_cons
   end interface linear2d_layer
@@ -42,13 +42,13 @@ end function linear2d_layer_cons
   interface
     pure module subroutine forward(self, input)
       class(linear2d_layer), intent(in out) :: self
-      real, intent(in) :: input(:, :, :)
+      real, intent(in) :: input(:, :)
     end subroutine forward
 
     pure module subroutine backward(self, input, gradient)
       class(linear2d_layer), intent(in out) :: self
-      real, intent(in) :: input(:, :, :)
-      real, intent(in) :: gradient(:, :, :)
+      real, intent(in) :: input(:, :)
+      real, intent(in) :: gradient(:, :)
     end subroutine backward
 
     module subroutine init(self, input_shape)
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 27a8f035..7a383e3d 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -4,14 +4,13 @@ program test_linear2d_layer
   implicit none
 
   logical :: ok = .true.
-  real :: sample_input(3, 4, 2) = reshape(&
-      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,&
-       0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],&
-      [3, 4, 2]) ! first batch are 0.1, second 0.2
-  real :: sample_gradient(3, 1, 2) = reshape([2., 2., 2., 3., 3., 3.], [3, 1, 2])
+  real :: sample_input(3, 4) = reshape(&
+      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],&
+      [3, 4]) ! first batch are 0.1, second 0.2
+  real :: sample_gradient(3, 1) = reshape([2., 2., 3.], [3, 1])
   type(linear2d_layer) :: linear
 
-  linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1, batch_size=2)
+  linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1)
   call linear % init([4])
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
@@ -22,11 +21,11 @@ program test_linear2d_layer
   subroutine test_linear2d_layer_forward(linear, ok, input)
     type(linear2d_layer), intent(in out) :: linear
     logical, intent(in out) :: ok
-    real, intent(in) :: input(3, 4, 2)
-    real :: output_shape(3)
-    real :: output_flat(6)
-    real :: expected_shape(3) = [3, 1, 2]
-    real :: expected_output_flat(6) = [0.15, 0.15, 0.15, 0.19, 0.19, 0.19]
+    real, intent(in) :: input(3, 4)
+    real :: output_shape(2)
+    real :: output_flat(3)
+    real :: expected_shape(2) = [3, 1]
+    real :: expected_output_flat(3) = [0.17, 0.17, 0.17]
 
     call linear % forward(input)
 
@@ -45,28 +44,23 @@ end subroutine test_linear2d_layer_forward
   subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
     type(linear2d_layer), intent(in out) :: linear
     logical, intent(in out) :: ok
-    real, intent(in) :: input(3, 4, 2)
-    real, intent(in) :: gradient(3, 1, 2)
-    real :: gradient_shape(3)
+    real, intent(in) :: input(3, 4)
+    real, intent(in) :: gradient(3, 1)
+    real :: gradient_shape(2)
     real :: dw_shape(2)
     real :: db_shape(1)
-    real :: gradient_flat(24)
+    real :: gradient_flat(12)
     real :: dw_flat(4)
-    real :: expected_gradient_shape(3) = [3, 4, 2]
+    real :: expected_gradient_shape(2) = [3, 4]
     real :: expected_dw_shape(2) = [4, 1]
     real :: expected_db_shape(1) = [1]
-    real :: expected_gradient_flat(24) = [&
-        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
-        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
-        0.200000003, 0.200000003, 0.200000003, 0.200000003,&
-        0.300000012, 0.300000012, 0.300000012, 0.300000012,&
-        0.300000012, 0.300000012, 0.300000012, 0.300000012,&
-        0.300000012, 0.300000012, 0.300000012, 0.300000012&
+    real :: expected_gradient_flat(12) = [&
+        0.2, 0.2, 0.3, 0.2,&
+        0.2, 0.3, 0.2, 0.2,&
+        0.3, 0.2, 0.2, 0.3&
     ]
-    real :: expected_dw_flat(4)
-    real :: expected_db(1) = [15.0]
-
-    expected_dw_flat = 2.40000010
+    real :: expected_dw_flat(4) = [0.7, 0.7, 1.4, 1.4]
+    real :: expected_db(1) = [7]
 
     call linear % backward(input, gradient)
 
@@ -104,8 +98,8 @@ end subroutine test_linear2d_layer_backward
 
   subroutine test_linear2d_layer_gradient_updates(ok)
     logical, intent(in out) :: ok
-    real :: input(3, 4, 1) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4, 1])
-    real :: gradient(3, 2, 1) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2, 1])
+    real :: input(3, 4) = reshape([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12], [3, 4])
+    real :: gradient(3, 2) = reshape([0.0, 10., 0.2, 3., 0.4, 1.], [3, 2])
     type(linear2d_layer) :: linear
 
     integer :: num_parameters

From dd1297ea40bb115d912142fbcc8e9dc26713828b Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Fri, 14 Feb 2025 23:59:28 +0400
Subject: [PATCH 21/44] linear2d_layer: forgot a file

---
 src/nf/nf_linear2d_layer_submodule.f90 | 33 +++++++++++---------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index 7e615df5..1a513fb8 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -3,23 +3,22 @@
   implicit none
 contains
   module function linear2d_layer_cons(&
-      sequence_length, in_features, out_features, batch_size&
+      sequence_length, in_features, out_features&
   ) result(res)
-    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+    integer, intent(in) :: sequence_length, in_features, out_features
     type(linear2d_layer) :: res
 
     res % in_features = in_features
     res % out_features = out_features
     res % sequence_length = sequence_length
-    res % batch_size = batch_size
   end function linear2d_layer_cons
 
   module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
-    allocate(self % output(self % sequence_length, self % out_features, self % batch_size))
-    allocate(self % gradient(self % sequence_length, self % in_features, self % batch_size))
+    allocate(self % output(self % sequence_length, self % out_features))
+    allocate(self % gradient(self % sequence_length, self % in_features))
 
     allocate(self % weights(self % in_features, self % out_features))
     self % weights = 0.1
@@ -35,30 +34,26 @@ end subroutine init
 
   pure module subroutine forward(self, input)
     class(linear2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :, :)
-    integer :: i, j
+    real, intent(in) :: input(:, :)
+    integer :: i
 
-    do concurrent(i = 1: self % batch_size)
-      self % output(:, :, i) = matmul(input(:, :, i), self % weights)
-    end do
-    do concurrent(i = 1: self % batch_size, j = 1: self % sequence_length)
-      self % output(j, :, i) = self % output(j, :, i) + self % biases
+    self % output(:, :) = matmul(input(:, :), self % weights)
+    do concurrent(i = 1: self % sequence_length)
+      self % output(i, :) = self % output(i, :) + self % biases
     end do
   end subroutine forward
 
   pure module subroutine backward(self, input, gradient)
     class(linear2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :, :)
-    real, intent(in) :: gradient(:, :, :)
+    real, intent(in) :: input(:, :)
+    real, intent(in) :: gradient(:, :)
     real :: db(self % out_features)
     real :: dw(self % in_features, self % out_features)
     integer :: i
 
-    do concurrent(i = 1: self % batch_size)
-      self % dw = self % dw + matmul(transpose(input(:, :, i)), gradient(:, :, i))
-      self % db = self % db + sum(gradient(:, :, i), 1)
-      self % gradient(:, :, i) = matmul(gradient(:, :, i), transpose(self % weights))
-    end do
+    self % dw = self % dw + matmul(transpose(input(:, :)), gradient(:, :))
+    self % db = self % db + sum(gradient(:, :), 1)
+    self % gradient(:, :) = matmul(gradient(:, :), transpose(self % weights))
   end subroutine backward
 
   pure module function get_num_params(self) result(num_params)

From e4cb526295cc3b61b3611ec1f64d13443c774cf9 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sat, 15 Feb 2025 00:01:36 +0400
Subject: [PATCH 22/44] linear2d_layer: temporarily remove api

---
 src/nf/nf_layer_constructors.f90           |  4 +-
 src/nf/nf_layer_constructors_submodule.f90 |  8 ++--
 src/nf/nf_layer_submodule.f90              | 48 +++++++++++-----------
 src/nf/nf_network_submodule.f90            |  4 +-
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index 40480e85..ea923442 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -185,8 +185,8 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
-    module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
-      integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+    module function linear2d(sequence_length, in_features, out_features) result(res)
+      integer, intent(in) :: sequence_length, in_features, out_features
       type(layer) :: res
     end function linear2d
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 9bf157fa..52a9e5ab 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -149,13 +149,13 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
-  module function linear2d(sequence_length, in_features, out_features, batch_size) result(res)
-    integer, intent(in) :: batch_size, sequence_length, in_features, out_features
+  module function linear2d(sequence_length, in_features, out_features) result(res)
+    integer, intent(in) :: sequence_length, in_features, out_features
     type(layer) :: res
 
     res % name = 'linear2d'
-    res % layer_shape = [sequence_length, out_features, batch_size]
-    allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features, batch_size))
+    res % layer_shape = [sequence_length, out_features]
+    allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features))
   end function linear2d
 
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index e9deb956..f46ece84 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -48,8 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
-          type is(linear2d_layer)
-            call this_layer % backward(prev_layer % output, gradient)
+!          type is(linear2d_layer)
+!            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -119,11 +119,11 @@ pure module subroutine backward_3d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
         end select
 
-      type is(linear2d_layer)
-        select type(prev_layer => previous % p)
-          type is(input3d_layer)
-            call this_layer % backward(prev_layer % output, gradient)
-        end select
+!      type is(linear2d_layer)
+!        select type(prev_layer => previous % p)
+!          type is(input3d_layer)
+!            call this_layer % backward(prev_layer % output, gradient)
+!        end select
 
     end select
 
@@ -191,8 +191,8 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
-          type is(linear2d_layer)
-            call this_layer % forward(prev_layer % output)
+!          type is(linear2d_layer)
+!            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -207,13 +207,13 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
         end select
 
-      type is(linear2d_layer)
-        select type(prev_layer => input % p)
-          type is(input3d_layer)
-            call this_layer % forward(prev_layer % output)
-          type is(linear2d_layer)
-            call this_layer % forward(prev_layer % output)
-        end select
+!      type is(linear2d_layer)
+!        select type(prev_layer => input % p)
+!          type is(input3d_layer)
+!            call this_layer % forward(prev_layer % output)
+!          type is(linear2d_layer)
+!            call this_layer % forward(prev_layer % output)
+!        end select
 
     end select
 
@@ -347,8 +347,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (reshape3d_layer)
         num_params = 0
-      type is (linear2d_layer)
-        num_params = this_layer % get_num_params()
+!      type is (linear2d_layer)
+!        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -376,8 +376,8 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
-      type is (linear2d_layer)
-        params = this_layer % get_params()
+!      type is (linear2d_layer)
+!        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -405,8 +405,8 @@ module function get_gradients(self) result(gradients)
         ! No gradients to get.
       type is (reshape3d_layer)
         ! No gradients to get.
-      type is (linear2d_layer)
-        gradients = this_layer % get_gradients()
+!      type is (linear2d_layer)
+!        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -472,8 +472,8 @@ module subroutine set_params(self, params)
           class default
         error stop 'Unknown layer type.'
 
-    type is (linear2d_layer)
-        call this_layer % set_params(params)
+!    type is (linear2d_layer)
+!        call this_layer % set_params(params)
     end select
 
   end subroutine set_params
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 57244046..434ef836 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -157,8 +157,8 @@ module subroutine backward(self, output, loss)
 
           type is(reshape3d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
-          type is(linear2d_layer)
-            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+!          type is(linear2d_layer)
+!            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 

From 86ec62873e658674416225f7c85dce73b2795c80 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 22:32:01 -0500
Subject: [PATCH 23/44] Don't expose the concrete layer type via nf

---
 src/nf.f90 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/nf.f90 b/src/nf.f90
index 4351e201..e9b027c1 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -12,5 +12,4 @@ module nf
                            gaussian, linear, relu, leaky_relu,     &
                            sigmoid, softmax, softplus, step, tanhf, &
                            celu
-  use nf_linear2d_layer, only: linear2d_layer
 end module nf

From d40aebb51d280e034bea857f8c67fd7d43f97dc3 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 22:32:17 -0500
Subject: [PATCH 24/44] Report success to stdout

---
 test/test_linear2d_layer.f90 | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 7a383e3d..b1f39694 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -17,7 +17,15 @@ program test_linear2d_layer
   call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)
   call test_linear2d_layer_gradient_updates(ok)
 
+  if (ok) then
+    print '(a)', 'test_linear2d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_linear2d_layer: One or more tests failed.'
+    stop 1
+  end if
+
 contains
+
   subroutine test_linear2d_layer_forward(linear, ok, input)
     type(linear2d_layer), intent(in out) :: linear
     logical, intent(in out) :: ok
@@ -161,4 +169,5 @@ subroutine test_linear2d_layer_gradient_updates(ok)
       write(stderr, '(a)') 'incorrect updated biases.. failed'
     end if
   end subroutine test_linear2d_layer_gradient_updates
+
 end program test_linear2d_layer
\ No newline at end of file

From b80355374fd1b4d231903795aa991201bc0f3524 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 22:32:33 -0500
Subject: [PATCH 25/44] Include linear2d test in cmake

---
 test/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 35954894..12236416 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -2,6 +2,7 @@ foreach(execid
   input1d_layer
   input2d_layer
   input3d_layer
+  linear2d_layer
   parametric_activation
   dense_layer
   conv2d_layer

From f1a01a67443722bbe3b3ced4c0d3c3460236b416 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 22:34:41 -0500
Subject: [PATCH 26/44] Add Linear2d to README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d2cff5b1..ebf7704d 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
 | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
+| Linear (2-d) | `linear2d` | `input2d` | 2 | ✅ | ✅ |
 | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |
 
 (*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.

From b39e6dae84319df8d28ec7431c4f6560e3ba689e Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sat, 15 Feb 2025 22:48:40 -0500
Subject: [PATCH 27/44] Plumbing of linear2d with input2d and linear2d

---
 src/nf/nf_layer_submodule.f90   | 59 +++++++++++++++++++--------------
 src/nf/nf_network_submodule.f90 |  4 +--
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index f46ece84..d13362ab 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -64,8 +64,20 @@ pure module subroutine backward_2d(self, previous, gradient)
     real, intent(in) :: gradient(:,:)
 
     ! Backward pass from a 2-d layer downstream currently implemented
-    ! only for dense and flatten layers
-    ! CURRENTLY NO LAYERS, tbd: pull/197 and pull/199
+    ! only for input2d and linear2d layers
+    select type(this_layer => self % p)
+
+      type is(linear2d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+    end select
+
   end subroutine backward_2d
 
 
@@ -119,12 +131,6 @@ pure module subroutine backward_3d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
         end select
 
-!      type is(linear2d_layer)
-!        select type(prev_layer => previous % p)
-!          type is(input3d_layer)
-!            call this_layer % backward(prev_layer % output, gradient)
-!        end select
-
     end select
 
   end subroutine backward_3d
@@ -207,13 +213,15 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
         end select
 
-!      type is(linear2d_layer)
-!        select type(prev_layer => input % p)
-!          type is(input3d_layer)
-!            call this_layer % forward(prev_layer % output)
-!          type is(linear2d_layer)
-!            call this_layer % forward(prev_layer % output)
-!        end select
+      type is(linear2d_layer)
+
+        ! Upstream layers permitted: input2d, linear2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
 
     end select
 
@@ -250,8 +258,10 @@ pure module subroutine get_output_2d(self, output)
 
       type is(input2d_layer)
         allocate(output, source=this_layer % output)
+      type is(linear2d_layer)
+        allocate(output, source=this_layer % output)
       class default
-        error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
+        error stop '2-d output can only be read from an input2d or linear2d layer.'
 
     end select
 
@@ -347,8 +357,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (reshape3d_layer)
         num_params = 0
-!      type is (linear2d_layer)
-!        num_params = this_layer % get_num_params()
+      type is (linear2d_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -376,8 +386,8 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
-!      type is (linear2d_layer)
-!        params = this_layer % get_params()
+      type is (linear2d_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -405,8 +415,8 @@ module function get_gradients(self) result(gradients)
         ! No gradients to get.
       type is (reshape3d_layer)
         ! No gradients to get.
-!      type is (linear2d_layer)
-!        gradients = this_layer % get_gradients()
+      type is (linear2d_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -454,6 +464,9 @@ module subroutine set_params(self, params)
       type is (conv2d_layer)
         call this_layer % set_params(params)
 
+      type is (linear2d_layer)
+        call this_layer % set_params(params)
+
       type is (maxpool2d_layer)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
@@ -472,8 +485,6 @@ module subroutine set_params(self, params)
           class default
         error stop 'Unknown layer type.'
 
-!    type is (linear2d_layer)
-!        call this_layer % set_params(params)
     end select
 
   end subroutine set_params
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 434ef836..57244046 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -157,8 +157,8 @@ module subroutine backward(self, output, loss)
 
           type is(reshape3d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
-!          type is(linear2d_layer)
-!            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(linear2d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 

From 1bec5319a7c6a2883624e5170e5a897ee8429777 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 16:00:35 +0400
Subject: [PATCH 28/44] linear2d_layer: add flatten2d layer

---
 src/nf/nf_flatten2d_layer.f90              | 75 ++++++++++++++++++
 src/nf/nf_flatten2d_layer_submodule.f90    | 48 ++++++++++++
 src/nf/nf_layer_constructors.f90           | 21 ++++-
 src/nf/nf_layer_constructors_submodule.f90 |  8 ++
 test/test_flatten2d_layer.f90              | 89 ++++++++++++++++++++++
 5 files changed, 240 insertions(+), 1 deletion(-)
 create mode 100644 src/nf/nf_flatten2d_layer.f90
 create mode 100644 src/nf/nf_flatten2d_layer_submodule.f90
 create mode 100644 test/test_flatten2d_layer.f90

diff --git a/src/nf/nf_flatten2d_layer.f90 b/src/nf/nf_flatten2d_layer.f90
new file mode 100644
index 00000000..e67037f8
--- /dev/null
+++ b/src/nf/nf_flatten2d_layer.f90
@@ -0,0 +1,75 @@
+module nf_flatten2d_layer
+
+  !! This module provides the concrete flatten2d layer type.
+  !! It is used internally by the layer type.
+  !! It is not intended to be used directly by the user.
+
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: flatten2d_layer
+
+  type, extends(base_layer) :: flatten2d_layer
+
+    !! Concrete implementation of a flatten2d (2-d to 1-d) layer.
+
+    integer, allocatable :: input_shape(:)
+    integer :: output_size
+
+    real, allocatable :: gradient(:,:)
+    real, allocatable :: output(:)
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: init
+
+  end type flatten2d_layer
+
+  interface flatten2d_layer
+    elemental module function flatten2d_layer_cons() result(res)
+      !! This function returns the `flatten2d_layer` instance.
+      type(flatten2d_layer) :: res
+        !! `flatten2d_layer` instance
+    end function flatten2d_layer_cons
+  end interface flatten2d_layer
+
+  interface
+
+    pure module subroutine backward(self, input, gradient)
+      !! Apply the backward pass to the flatten2d layer.
+      !! This is a reshape operation from 1-d gradient to 2-d input.
+      class(flatten2d_layer), intent(in out) :: self
+        !! flatten2d layer instance
+      real, intent(in) :: input(:,:)
+        !! Input from the previous layer
+      real, intent(in) :: gradient(:)
+        !! Gradient from the next layer
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Propagate forward the layer.
+      !! Calling this subroutine updates the values of a few data components
+      !! of `flatten2d_layer` that are needed for the backward pass.
+      class(flatten2d_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:,:)
+        !! Input from the previous layer
+    end subroutine forward
+
+    module subroutine init(self, input_shape)
+      !! Initialize the layer data structures.
+      !!
+      !! This is a deferred procedure from the `base_layer` abstract type.
+      class(flatten2d_layer), intent(in out) :: self
+        !! Dense layer instance
+      integer, intent(in) :: input_shape(:)
+        !! Shape of the input layer
+    end subroutine init
+
+  end interface
+
+end module nf_flatten2d_layer
diff --git a/src/nf/nf_flatten2d_layer_submodule.f90 b/src/nf/nf_flatten2d_layer_submodule.f90
new file mode 100644
index 00000000..875b7374
--- /dev/null
+++ b/src/nf/nf_flatten2d_layer_submodule.f90
@@ -0,0 +1,48 @@
+submodule(nf_flatten2d_layer) nf_flatten2d_layer_submodule
+
+  !! This module provides the concrete flatten2d layer type.
+  !! It is used internally by the layer type.
+  !! It is not intended to be used directly by the user.
+
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+contains
+
+  elemental module function flatten2d_layer_cons() result(res)
+    type(flatten2d_layer) :: res
+  end function flatten2d_layer_cons
+
+
+  pure module subroutine backward(self, input, gradient)
+    class(flatten2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    real, intent(in) :: gradient(:)
+    self % gradient = reshape(gradient, shape(input))
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(flatten2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:)
+    self % output = pack(input, .true.)
+  end subroutine forward
+
+
+  module subroutine init(self, input_shape)
+    class(flatten2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % input_shape = input_shape
+    self % output_size = product(input_shape)
+
+    allocate(self % gradient(input_shape(1), input_shape(2)))
+    self % gradient = 0
+
+    allocate(self % output(self % output_size))
+    self % output = 0
+
+  end subroutine init
+
+end submodule nf_flatten2d_layer_submodule
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index ea923442..cc7bfe49 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -8,7 +8,7 @@ module nf_layer_constructors
   implicit none
 
   private
-  public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
+  public :: conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d
 
   interface input
 
@@ -125,6 +125,25 @@ module function flatten() result(res)
         !! Resulting layer instance
     end function flatten
 
+    module function flatten2d() result(res)
+      !! Flatten (2-d -> 1-d) layer constructor.
+      !!
+      !! Use this layer to chain layers with 2-d outputs to layers with 2-d
+      !! inputs.
+      !!
+      !! A flatten layer must not be the first layer in the network.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: flatten, layer
+      !! type(layer) :: flatten_layer
+      !! flatten_layer = flatten()
+      !! ```
+      type(layer) :: res
+        !! Resulting layer instance
+    end function flatten2d
+
     module function conv2d(filters, kernel_size, activation) result(res)
       !! 2-d convolutional layer constructor.
       !!
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 52a9e5ab..ec42a1a6 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -4,6 +4,7 @@
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_flatten_layer, only: flatten_layer
+  use nf_flatten2d_layer, only: flatten2d_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -72,6 +73,13 @@ module function flatten() result(res)
   end function flatten
 
 
+  module function flatten2d() result(res)
+    type(layer) :: res
+    res % name = 'flatten2d'
+    allocate(res % p, source=flatten2d_layer())
+  end function flatten2d
+
+
   module function input1d(layer_size) result(res)
     integer, intent(in) :: layer_size
     type(layer) :: res
diff --git a/test/test_flatten2d_layer.f90 b/test/test_flatten2d_layer.f90
new file mode 100644
index 00000000..3189b4e9
--- /dev/null
+++ b/test/test_flatten2d_layer.f90
@@ -0,0 +1,89 @@
+program test_flatten2d_layer
+
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: dense, flatten2d, input, layer, network
+  use nf_flatten2d_layer, only: flatten2d_layer
+  use nf_input2d_layer, only: input2d_layer
+
+  implicit none
+
+  type(layer) :: test_layer, input_layer
+  type(network) :: net
+  real, allocatable :: gradient(:,:)
+  real, allocatable :: output(:)
+  logical :: ok = .true.
+
+  test_layer = flatten2d()
+
+  if (.not. test_layer % name == 'flatten2d') then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer has its name set correctly.. failed'
+  end if
+
+  if (test_layer % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer is not initialized yet.. failed'
+  end if
+
+  input_layer = input(1, 2)
+  call test_layer % init(input_layer)
+
+  if (.not. test_layer % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer is now initialized.. failed'
+  end if
+
+  if (.not. all(test_layer % layer_shape == [2])) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer has an incorrect output shape.. failed'
+  end if
+
+  ! Test forward pass - reshaping from 2-d to 1-d
+
+  select type(this_layer => input_layer % p); type is(input2d_layer)
+    call this_layer % set(reshape(real([1, 2, 3, 4]), [2, 2]))
+  end select
+
+  call test_layer % forward(input_layer)
+  call test_layer % get_output(output)
+
+  if (.not. all(output == [1, 2, 3, 4])) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer correctly propagates forward.. failed'
+  end if
+
+  ! Test backward pass - reshaping from 1-d to 2-d
+
+  ! Calling backward() will set the values on the gradient component
+  ! input_layer is used only to determine shape
+  call test_layer % backward(input_layer, real([1, 2, 3, 4]))
+
+  select type(this_layer => test_layer % p); type is(flatten2d_layer)
+    gradient = this_layer % gradient
+  end select
+
+  if (.not. all(gradient == reshape(real([1, 2, 3, 4]), [2, 2]))) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer correctly propagates backward.. failed'
+  end if
+
+  net = network([ &
+    input(28, 28), &
+    flatten2d(), &
+    dense(10) &
+  ])
+
+  ! Test that the output layer receives 784 elements in the input
+  if (.not. all(net % layers(3) % input_layer_shape == [784])) then
+    ok = .false.
+    write(stderr, '(a)') 'flatten2d layer correctly chains input2d to dense.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_flatten2d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_flatten2d_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_flatten2d_layer

From d01a174834a4a2686d500b31fcf71e430d93dc90 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 16:02:44 +0400
Subject: [PATCH 29/44] linear2d_layer: make linear2d layer work with input2d
 and flatten2d

---
 src/nf.f90                      |  2 +-
 src/nf/nf_layer_submodule.f90   | 42 +++++++++++++++++++++++++++------
 src/nf/nf_network_submodule.f90 |  5 +++-
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/src/nf.f90 b/src/nf.f90
index e9b027c1..d215eb85 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -3,7 +3,7 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
-    conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
+    conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index d13362ab..bb37a965 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -4,6 +4,7 @@
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_flatten_layer, only: flatten_layer
+  use nf_flatten2d_layer, only: flatten2d_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -48,8 +49,16 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
-!          type is(linear2d_layer)
-!            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+      type is(flatten2d_layer)
+
+        ! Upstream layers permitted: linear2d_layer
+        select type(prev_layer => previous % p)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -63,8 +72,6 @@ pure module subroutine backward_2d(self, previous, gradient)
     class(layer), intent(in) :: previous
     real, intent(in) :: gradient(:,:)
 
-    ! Backward pass from a 2-d layer downstream currently implemented
-    ! only for input2d and linear2d layers
     select type(this_layer => self % p)
 
       type is(linear2d_layer)
@@ -197,8 +204,14 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
-!          type is(linear2d_layer)
-!            call this_layer % forward(prev_layer % output)
+        end select
+
+      type is(flatten2d_layer)
+        select type(prev_layer => input % p)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -241,6 +254,8 @@ pure module subroutine get_output_1d(self, output)
         allocate(output, source=this_layer % output)
       type is(flatten_layer)
         allocate(output, source=this_layer % output)
+      type is(flatten2d_layer)
+        allocate(output, source=this_layer % output)
       class default
         error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
 
@@ -312,9 +327,11 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(flatten2d_layer)
+        self % layer_shape = shape(this_layer % output)
     end select
 
-    self % input_layer_shape = input % layer_shape 
+    self % input_layer_shape = input % layer_shape
     self % initialized = .true.
 
   end subroutine init
@@ -355,6 +372,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (flatten_layer)
         num_params = 0
+      type is (flatten2d_layer)
+        num_params = 0
       type is (reshape3d_layer)
         num_params = 0
       type is (linear2d_layer)
@@ -384,6 +403,8 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (flatten_layer)
         ! No parameters to get.
+      type is (flatten2d_layer)
+        ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
       type is (linear2d_layer)
@@ -412,6 +433,8 @@ module function get_gradients(self) result(gradients)
       type is (maxpool2d_layer)
         ! No gradients to get.
       type is (flatten_layer)
+        ! No parameters to get.
+      type is (flatten2d_layer)
         ! No gradients to get.
       type is (reshape3d_layer)
         ! No gradients to get.
@@ -477,6 +500,11 @@ module subroutine set_params(self, params)
         write(stderr, '(a)') 'Warning: calling set_params() ' &
           // 'on a zero-parameter layer; nothing to do.'
 
+      type is (flatten2d_layer)
+        ! No parameters to set.
+        write(stderr, '(a)') 'Warning: calling set_params() ' &
+          // 'on a zero-parameter layer; nothing to do.'
+
       type is (reshape3d_layer)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 57244046..c2a9c903 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -151,7 +151,6 @@ module subroutine backward(self, output, loss)
             else
               call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient_3d)
             end if
-
           type is(maxpool2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
 
@@ -283,6 +282,10 @@ module function predict_2d(self, input) result(res)
     select type(output_layer => self % layers(num_layers) % p)
       type is(dense_layer)
         res = output_layer % output
+      type is(flatten_layer)
+        res = output_layer % output
+      class default
+        error stop 'network % output not implemented for this output layer'
     end select
 
   end function predict_2d

From 141fe57db9227cfe2c2b12cf663011df4d4c2a3f Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 16:05:16 +0400
Subject: [PATCH 30/44] update cmake

---
 CMakeLists.txt      | 2 ++
 test/CMakeLists.txt | 1 +
 2 files changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc2ddfcb..586997fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,8 @@ add_library(neural-fortran
   src/nf/nf_dense_layer_submodule.f90
   src/nf/nf_flatten_layer.f90
   src/nf/nf_flatten_layer_submodule.f90
+  src/nf/nf_flatten2d_layer.f90
+  src/nf/nf_flatten2d_layer_submodule.f90
   src/nf/nf_input1d_layer.f90
   src/nf/nf_input1d_layer_submodule.f90
   src/nf/nf_input2d_layer.f90
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 12236416..b52a3781 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,6 +8,7 @@ foreach(execid
   conv2d_layer
   maxpool2d_layer
   flatten_layer
+  flatten2d_layer
   insert_flatten
   reshape_layer
   dense_network

From c4b8fc70a31bdfe102d90c5cd57649a61e07eab6 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 23:06:56 +0400
Subject: [PATCH 31/44] linear2d_layer: use flatten layer instead of flatten2d

---
 src/nf/nf_layer_submodule.f90 | 30 ------------------------------
 1 file changed, 30 deletions(-)

diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index bb37a965..a5169ea4 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -4,7 +4,6 @@
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_flatten_layer, only: flatten_layer
-  use nf_flatten2d_layer, only: flatten2d_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -49,16 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
-        end select
-
-      type is(flatten2d_layer)
-
-        ! Upstream layers permitted: linear2d_layer
-        select type(prev_layer => previous % p)
           type is(linear2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
-          type is(input2d_layer)
-            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -204,14 +195,8 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
-        end select
-
-      type is(flatten2d_layer)
-        select type(prev_layer => input % p)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
-          type is(input2d_layer)
-            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -254,8 +239,6 @@ pure module subroutine get_output_1d(self, output)
         allocate(output, source=this_layer % output)
       type is(flatten_layer)
         allocate(output, source=this_layer % output)
-      type is(flatten2d_layer)
-        allocate(output, source=this_layer % output)
       class default
         error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
 
@@ -327,8 +310,6 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
-      type is(flatten2d_layer)
-        self % layer_shape = shape(this_layer % output)
     end select
 
     self % input_layer_shape = input % layer_shape
@@ -372,8 +353,6 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (flatten_layer)
         num_params = 0
-      type is (flatten2d_layer)
-        num_params = 0
       type is (reshape3d_layer)
         num_params = 0
       type is (linear2d_layer)
@@ -403,8 +382,6 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (flatten_layer)
         ! No parameters to get.
-      type is (flatten2d_layer)
-        ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
       type is (linear2d_layer)
@@ -434,8 +411,6 @@ module function get_gradients(self) result(gradients)
         ! No gradients to get.
       type is (flatten_layer)
         ! No parameters to get.
-      type is (flatten2d_layer)
-        ! No gradients to get.
       type is (reshape3d_layer)
         ! No gradients to get.
       type is (linear2d_layer)
@@ -500,11 +475,6 @@ module subroutine set_params(self, params)
         write(stderr, '(a)') 'Warning: calling set_params() ' &
           // 'on a zero-parameter layer; nothing to do.'
 
-      type is (flatten2d_layer)
-        ! No parameters to set.
-        write(stderr, '(a)') 'Warning: calling set_params() ' &
-          // 'on a zero-parameter layer; nothing to do.'
-
       type is (reshape3d_layer)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &

From 54d1bb0e42175fb8a6d52251642dfb20153f1a18 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 23:08:55 +0400
Subject: [PATCH 32/44] linear2d_layer: remove flatten2d layer

---
 src/nf/nf_flatten2d_layer.f90              | 75 ------------------
 src/nf/nf_flatten2d_layer_submodule.f90    | 48 ------------
 src/nf/nf_layer_constructors.f90           | 21 +----
 src/nf/nf_layer_constructors_submodule.f90 |  7 --
 test/test_flatten2d_layer.f90              | 89 ----------------------
 5 files changed, 1 insertion(+), 239 deletions(-)
 delete mode 100644 src/nf/nf_flatten2d_layer.f90
 delete mode 100644 src/nf/nf_flatten2d_layer_submodule.f90
 delete mode 100644 test/test_flatten2d_layer.f90

diff --git a/src/nf/nf_flatten2d_layer.f90 b/src/nf/nf_flatten2d_layer.f90
deleted file mode 100644
index e67037f8..00000000
--- a/src/nf/nf_flatten2d_layer.f90
+++ /dev/null
@@ -1,75 +0,0 @@
-module nf_flatten2d_layer
-
-  !! This module provides the concrete flatten2d layer type.
-  !! It is used internally by the layer type.
-  !! It is not intended to be used directly by the user.
-
-  use nf_base_layer, only: base_layer
-
-  implicit none
-
-  private
-  public :: flatten2d_layer
-
-  type, extends(base_layer) :: flatten2d_layer
-
-    !! Concrete implementation of a flatten2d (2-d to 1-d) layer.
-
-    integer, allocatable :: input_shape(:)
-    integer :: output_size
-
-    real, allocatable :: gradient(:,:)
-    real, allocatable :: output(:)
-
-  contains
-
-    procedure :: backward
-    procedure :: forward
-    procedure :: init
-
-  end type flatten2d_layer
-
-  interface flatten2d_layer
-    elemental module function flatten2d_layer_cons() result(res)
-      !! This function returns the `flatten2d_layer` instance.
-      type(flatten2d_layer) :: res
-        !! `flatten2d_layer` instance
-    end function flatten2d_layer_cons
-  end interface flatten2d_layer
-
-  interface
-
-    pure module subroutine backward(self, input, gradient)
-      !! Apply the backward pass to the flatten2d layer.
-      !! This is a reshape operation from 1-d gradient to 2-d input.
-      class(flatten2d_layer), intent(in out) :: self
-        !! flatten2d layer instance
-      real, intent(in) :: input(:,:)
-        !! Input from the previous layer
-      real, intent(in) :: gradient(:)
-        !! Gradient from the next layer
-    end subroutine backward
-
-    pure module subroutine forward(self, input)
-      !! Propagate forward the layer.
-      !! Calling this subroutine updates the values of a few data components
-      !! of `flatten2d_layer` that are needed for the backward pass.
-      class(flatten2d_layer), intent(in out) :: self
-        !! Dense layer instance
-      real, intent(in) :: input(:,:)
-        !! Input from the previous layer
-    end subroutine forward
-
-    module subroutine init(self, input_shape)
-      !! Initialize the layer data structures.
-      !!
-      !! This is a deferred procedure from the `base_layer` abstract type.
-      class(flatten2d_layer), intent(in out) :: self
-        !! Dense layer instance
-      integer, intent(in) :: input_shape(:)
-        !! Shape of the input layer
-    end subroutine init
-
-  end interface
-
-end module nf_flatten2d_layer
diff --git a/src/nf/nf_flatten2d_layer_submodule.f90 b/src/nf/nf_flatten2d_layer_submodule.f90
deleted file mode 100644
index 875b7374..00000000
--- a/src/nf/nf_flatten2d_layer_submodule.f90
+++ /dev/null
@@ -1,48 +0,0 @@
-submodule(nf_flatten2d_layer) nf_flatten2d_layer_submodule
-
-  !! This module provides the concrete flatten2d layer type.
-  !! It is used internally by the layer type.
-  !! It is not intended to be used directly by the user.
-
-  use nf_base_layer, only: base_layer
-
-  implicit none
-
-contains
-
-  elemental module function flatten2d_layer_cons() result(res)
-    type(flatten2d_layer) :: res
-  end function flatten2d_layer_cons
-
-
-  pure module subroutine backward(self, input, gradient)
-    class(flatten2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:,:)
-    real, intent(in) :: gradient(:)
-    self % gradient = reshape(gradient, shape(input))
-  end subroutine backward
-
-
-  pure module subroutine forward(self, input)
-    class(flatten2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:,:)
-    self % output = pack(input, .true.)
-  end subroutine forward
-
-
-  module subroutine init(self, input_shape)
-    class(flatten2d_layer), intent(in out) :: self
-    integer, intent(in) :: input_shape(:)
-
-    self % input_shape = input_shape
-    self % output_size = product(input_shape)
-
-    allocate(self % gradient(input_shape(1), input_shape(2)))
-    self % gradient = 0
-
-    allocate(self % output(self % output_size))
-    self % output = 0
-
-  end subroutine init
-
-end submodule nf_flatten2d_layer_submodule
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index cc7bfe49..ea923442 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -8,7 +8,7 @@ module nf_layer_constructors
   implicit none
 
   private
-  public :: conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d
+  public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
 
   interface input
 
@@ -125,25 +125,6 @@ module function flatten() result(res)
         !! Resulting layer instance
     end function flatten
 
-    module function flatten2d() result(res)
-      !! Flatten (2-d -> 1-d) layer constructor.
-      !!
-      !! Use this layer to chain layers with 2-d outputs to layers with 2-d
-      !! inputs.
-      !!
-      !! A flatten layer must not be the first layer in the network.
-      !!
-      !! Example:
-      !!
-      !! ```
-      !! use nf, only :: flatten, layer
-      !! type(layer) :: flatten_layer
-      !! flatten_layer = flatten()
-      !! ```
-      type(layer) :: res
-        !! Resulting layer instance
-    end function flatten2d
-
     module function conv2d(filters, kernel_size, activation) result(res)
       !! 2-d convolutional layer constructor.
       !!
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index ec42a1a6..8809db86 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -4,7 +4,6 @@
   use nf_conv2d_layer, only: conv2d_layer
   use nf_dense_layer, only: dense_layer
   use nf_flatten_layer, only: flatten_layer
-  use nf_flatten2d_layer, only: flatten2d_layer
   use nf_input1d_layer, only: input1d_layer
   use nf_input2d_layer, only: input2d_layer
   use nf_input3d_layer, only: input3d_layer
@@ -73,12 +72,6 @@ module function flatten() result(res)
   end function flatten
 
 
-  module function flatten2d() result(res)
-    type(layer) :: res
-    res % name = 'flatten2d'
-    allocate(res % p, source=flatten2d_layer())
-  end function flatten2d
-
 
   module function input1d(layer_size) result(res)
     integer, intent(in) :: layer_size
diff --git a/test/test_flatten2d_layer.f90 b/test/test_flatten2d_layer.f90
deleted file mode 100644
index 3189b4e9..00000000
--- a/test/test_flatten2d_layer.f90
+++ /dev/null
@@ -1,89 +0,0 @@
-program test_flatten2d_layer
-
-  use iso_fortran_env, only: stderr => error_unit
-  use nf, only: dense, flatten2d, input, layer, network
-  use nf_flatten2d_layer, only: flatten2d_layer
-  use nf_input2d_layer, only: input2d_layer
-
-  implicit none
-
-  type(layer) :: test_layer, input_layer
-  type(network) :: net
-  real, allocatable :: gradient(:,:)
-  real, allocatable :: output(:)
-  logical :: ok = .true.
-
-  test_layer = flatten2d()
-
-  if (.not. test_layer % name == 'flatten2d') then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer has its name set correctly.. failed'
-  end if
-
-  if (test_layer % initialized) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer is not initialized yet.. failed'
-  end if
-
-  input_layer = input(1, 2)
-  call test_layer % init(input_layer)
-
-  if (.not. test_layer % initialized) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer is now initialized.. failed'
-  end if
-
-  if (.not. all(test_layer % layer_shape == [2])) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer has an incorrect output shape.. failed'
-  end if
-
-  ! Test forward pass - reshaping from 2-d to 1-d
-
-  select type(this_layer => input_layer % p); type is(input2d_layer)
-    call this_layer % set(reshape(real([1, 2, 3, 4]), [2, 2]))
-  end select
-
-  call test_layer % forward(input_layer)
-  call test_layer % get_output(output)
-
-  if (.not. all(output == [1, 2, 3, 4])) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer correctly propagates forward.. failed'
-  end if
-
-  ! Test backward pass - reshaping from 1-d to 2-d
-
-  ! Calling backward() will set the values on the gradient component
-  ! input_layer is used only to determine shape
-  call test_layer % backward(input_layer, real([1, 2, 3, 4]))
-
-  select type(this_layer => test_layer % p); type is(flatten2d_layer)
-    gradient = this_layer % gradient
-  end select
-
-  if (.not. all(gradient == reshape(real([1, 2, 3, 4]), [2, 2]))) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer correctly propagates backward.. failed'
-  end if
-
-  net = network([ &
-    input(28, 28), &
-    flatten2d(), &
-    dense(10) &
-  ])
-
-  ! Test that the output layer receives 784 elements in the input
-  if (.not. all(net % layers(3) % input_layer_shape == [784])) then
-    ok = .false.
-    write(stderr, '(a)') 'flatten2d layer correctly chains input2d to dense.. failed'
-  end if
-
-  if (ok) then
-    print '(a)', 'test_flatten2d_layer: All tests passed.'
-  else
-    write(stderr, '(a)') 'test_flatten2d_layer: One or more tests failed.'
-    stop 1
-  end if
-
-end program test_flatten2d_layer

From 9a4422fba4f4bae57ec83da6dba5ad5b8ea2437f Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 23:14:09 +0400
Subject: [PATCH 33/44] linear2d_layer: remove public api

---
 src/nf.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nf.f90 b/src/nf.f90
index d215eb85..e9b027c1 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -3,7 +3,7 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
-    conv2d, dense, flatten, flatten2d, input, maxpool2d, reshape, linear2d
+    conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network

From 7606d2c1c01c6293f8b77bf2ba2e398b726cdd48 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 23:31:47 +0400
Subject: [PATCH 34/44] linear2d_layer: update cmakelists

---
 CMakeLists.txt      | 2 --
 test/CMakeLists.txt | 1 -
 2 files changed, 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 586997fd..fc2ddfcb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,8 +28,6 @@ add_library(neural-fortran
   src/nf/nf_dense_layer_submodule.f90
   src/nf/nf_flatten_layer.f90
   src/nf/nf_flatten_layer_submodule.f90
-  src/nf/nf_flatten2d_layer.f90
-  src/nf/nf_flatten2d_layer_submodule.f90
   src/nf/nf_input1d_layer.f90
   src/nf/nf_input1d_layer_submodule.f90
   src/nf/nf_input2d_layer.f90
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index b52a3781..12236416 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -8,7 +8,6 @@ foreach(execid
   conv2d_layer
   maxpool2d_layer
   flatten_layer
-  flatten2d_layer
   insert_flatten
   reshape_layer
   dense_network

From 7d271fe5d54d3c2b73f47f0fa2bac13db7f9e9cc Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 16 Feb 2025 23:32:27 +0400
Subject: [PATCH 35/44] linear2d_layer: workaround cpu imprecision to make ci
 happy

---
 test/test_linear2d_layer.f90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index b1f39694..15d7bf2e 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -58,7 +58,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
     real :: dw_shape(2)
     real :: db_shape(1)
     real :: gradient_flat(12)
-    real :: dw_flat(4)
+    integer :: dw_flat(4)  ! cpu imprecision workaround
     real :: expected_gradient_shape(2) = [3, 4]
     real :: expected_dw_shape(2) = [4, 1]
     real :: expected_db_shape(1) = [1]
@@ -67,7 +67,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
         0.2, 0.3, 0.2, 0.2,&
         0.3, 0.2, 0.2, 0.3&
     ]
-    real :: expected_dw_flat(4) = [0.7, 0.7, 1.4, 1.4]
+    integer :: expected_dw_flat(4) = [7, 7, 14, 14]  ! cpu imprecision workaround
     real :: expected_db(1) = [7]
 
     call linear % backward(input, gradient)
@@ -93,7 +93,7 @@ subroutine test_linear2d_layer_backward(linear, ok, input, gradient)
       ok = .false.
       write(stderr, '(a)') 'backward returned incorrect gradient values.. failed'
     end if
-    dw_flat = reshape(linear % dw, shape(dw_flat))
+    dw_flat = nint(reshape(linear % dw, shape(dw_flat)) * 10)
     if (.not. all(dw_flat.eq.expected_dw_flat)) then
       ok = .false.
       write(stderr, '(a)') 'backward returned incorrect dw values.. failed'

From 539fde82de04c8e632e6661019175ce997414a83 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Sun, 16 Feb 2025 22:45:34 -0500
Subject: [PATCH 36/44] Add linear2d example

---
 example/CMakeLists.txt |  1 +
 example/linear2d.f90   | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 example/linear2d.f90

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 28cf71a7..0257dd7d 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -2,6 +2,7 @@ foreach(execid
   cnn_mnist
   dense_mnist
   get_set_network_params
+  linear2d
   network_parameters
   simple
   sine
diff --git a/example/linear2d.f90 b/example/linear2d.f90
new file mode 100644
index 00000000..1b71f5d3
--- /dev/null
+++ b/example/linear2d.f90
@@ -0,0 +1,29 @@
+program linear2d_example
+
+  use nf, only: input, network, sgd, linear2d, mse, flatten
+  implicit none
+
+  type(network) :: net
+  real :: x(3, 4) = reshape( &
+    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], &
+    [3, 4])
+  real :: y(3) = [0.12, 0.1, 0.3]
+  integer, parameter :: num_iterations = 500
+  integer :: n
+  
+  net = network([ &
+    input(3, 4), &
+    linear2d(3, 4, 1), &
+    flatten() &
+  ])
+  
+  call net % print_info()
+
+  do n = 1, num_iterations
+    call net % forward(x)
+    call net % backward(y, mse())
+    call net % update(optimizer=sgd(learning_rate=1.))
+    print '(i4,3(3x,f8.6))', n, net % predict(x)
+  end do
+
+end program linear2d_example
\ No newline at end of file

From a97f14114178464481fda386bb45c62a7786fe8b Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 12:10:44 +0400
Subject: [PATCH 37/44] linear2d_layer: remove redundant constructor args

---
 example/linear2d.f90                       |  4 ++--
 src/nf/nf_layer_constructors.f90           |  4 ++--
 src/nf/nf_layer_constructors_submodule.f90 |  6 +++---
 src/nf/nf_linear2d_layer.f90               |  6 ++----
 src/nf/nf_linear2d_layer_submodule.f90     | 14 ++++++++------
 test/test_linear2d_layer.f90               | 10 +++++-----
 6 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/example/linear2d.f90 b/example/linear2d.f90
index 1b71f5d3..06c8b255 100644
--- a/example/linear2d.f90
+++ b/example/linear2d.f90
@@ -8,12 +8,12 @@ program linear2d_example
     [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], &
     [3, 4])
   real :: y(3) = [0.12, 0.1, 0.3]
-  integer, parameter :: num_iterations = 500
+  integer, parameter :: num_iterations = 5
   integer :: n
   
   net = network([ &
     input(3, 4), &
-    linear2d(3, 4, 1), &
+    linear2d(3, 1), &
     flatten() &
   ])
   
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index ea923442..b5ea4dbe 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -185,8 +185,8 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
-    module function linear2d(sequence_length, in_features, out_features) result(res)
-      integer, intent(in) :: sequence_length, in_features, out_features
+    module function linear2d(sequence_length, out_features) result(res)
+      integer, intent(in) :: sequence_length, out_features
       type(layer) :: res
     end function linear2d
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 8809db86..0724a15e 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -150,13 +150,13 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
-  module function linear2d(sequence_length, in_features, out_features) result(res)
-    integer, intent(in) :: sequence_length, in_features, out_features
+  module function linear2d(sequence_length, out_features) result(res)
+    integer, intent(in) :: sequence_length, out_features
     type(layer) :: res
 
     res % name = 'linear2d'
     res % layer_shape = [sequence_length, out_features]
-    allocate(res % p, source=linear2d_layer(sequence_length, in_features, out_features))
+    allocate(res % p, source=linear2d_layer(out_features))
   end function linear2d
 
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 60c2b0eb..6f9c6a31 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -31,10 +31,8 @@ module nf_linear2d_layer
   end type linear2d_layer
 
   interface linear2d_layer
-    module function linear2d_layer_cons(&
-        sequence_length, in_features, out_features&
-    ) result(res)
-      integer, intent(in) :: sequence_length, in_features, out_features
+    module function linear2d_layer_cons(out_features) result(res)
+      integer, intent(in) :: out_features
       type(linear2d_layer) :: res
     end function linear2d_layer_cons
   end interface linear2d_layer
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index 1a513fb8..9df7e58e 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -2,21 +2,23 @@
   use nf_base_layer, only: base_layer
   implicit none
 contains
-  module function linear2d_layer_cons(&
-      sequence_length, in_features, out_features&
-  ) result(res)
-    integer, intent(in) :: sequence_length, in_features, out_features
+  module function linear2d_layer_cons(out_features) result(res)
+    integer, intent(in) :: out_features
     type(linear2d_layer) :: res
 
-    res % in_features = in_features
     res % out_features = out_features
-    res % sequence_length = sequence_length
   end function linear2d_layer_cons
 
   module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
+    if (size(input_shape) /= 2) then
+      error stop "Linear2D Layer accepts 2D input"
+    end if
+    self % sequence_length = input_shape(1)
+    self % in_features = input_shape(2)
+
     allocate(self % output(self % sequence_length, self % out_features))
     allocate(self % gradient(self % sequence_length, self % in_features))
 
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index 15d7bf2e..b1f4f55c 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -6,12 +6,12 @@ program test_linear2d_layer
   logical :: ok = .true.
   real :: sample_input(3, 4) = reshape(&
       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],&
-      [3, 4]) ! first batch are 0.1, second 0.2
+      [3, 4])
   real :: sample_gradient(3, 1) = reshape([2., 2., 3.], [3, 1])
   type(linear2d_layer) :: linear
 
-  linear = linear2d_layer(sequence_length=3, in_features=4, out_features=1)
-  call linear % init([4])
+  linear = linear2d_layer(out_features=1)
+  call linear % init([3, 4])
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
   call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)
@@ -131,8 +131,8 @@ subroutine test_linear2d_layer_gradient_updates(ok)
 
     integer :: i
 
-    linear = linear2d_layer(sequence_length=3, in_features=4, out_features=2, batch_size=1)
-    call linear % init([4])
+    linear = linear2d_layer(out_features=2)
+    call linear % init([3, 4])
     call linear % forward(input)
     call linear % backward(input, gradient)
 

From bbfaf3cda6b0d65d07612f89c65140f1c2d50a9e Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 13:10:50 +0400
Subject: [PATCH 38/44] linear2d_layer: make example converge

---
 example/linear2d.f90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/example/linear2d.f90 b/example/linear2d.f90
index 06c8b255..79077723 100644
--- a/example/linear2d.f90
+++ b/example/linear2d.f90
@@ -5,10 +5,10 @@ program linear2d_example
 
   type(network) :: net
   real :: x(3, 4) = reshape( &
-    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], &
+    [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], &
     [3, 4])
-  real :: y(3) = [0.12, 0.1, 0.3]
-  integer, parameter :: num_iterations = 5
+  real :: y(3) = [0.12, 0.1, 0.2]
+  integer, parameter :: num_iterations = 9
   integer :: n
   
   net = network([ &
@@ -22,7 +22,7 @@ program linear2d_example
   do n = 1, num_iterations
     call net % forward(x)
     call net % backward(y, mse())
-    call net % update(optimizer=sgd(learning_rate=1.))
+    call net % update(optimizer=sgd(learning_rate=0.01))
     print '(i4,3(3x,f8.6))', n, net % predict(x)
   end do
 

From 4d28a0a55b12471d2ba8c4087a004741e7399ea9 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 13:47:54 +0400
Subject: [PATCH 39/44] linear2d_layer: make weighs init with normal
 distribution

---
 src/nf/nf_linear2d_layer_submodule.f90 | 5 +++--
 test/test_linear2d_layer.f90           | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index 9df7e58e..effcd43f 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -1,5 +1,6 @@
 submodule(nf_linear2d_layer) nf_linear2d_layer_submodule
   use nf_base_layer, only: base_layer
+  use nf_random, only: random_normal
   implicit none
 contains
   module function linear2d_layer_cons(out_features) result(res)
@@ -23,10 +24,10 @@ module subroutine init(self, input_shape)
     allocate(self % gradient(self % sequence_length, self % in_features))
 
     allocate(self % weights(self % in_features, self % out_features))
-    self % weights = 0.1
+    call random_normal(self % weights)
 
     allocate(self % biases(self % out_features))
-    self%biases = 0.11
+    call random_normal(self % biases)
 
     allocate(self % dw(self % in_features, self % out_features))
     self % dw = 0.0
diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index b1f4f55c..c345fddb 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -12,6 +12,8 @@ program test_linear2d_layer
 
   linear = linear2d_layer(out_features=1)
   call linear % init([3, 4])
+  linear % weights = 0.1
+  linear % biases = 0.11
 
   call test_linear2d_layer_forward(linear, ok, sample_input)
   call test_linear2d_layer_backward(linear, ok, sample_input, sample_gradient)

From bfc69d5d77984ccdcbcd7d9a42582bdc2ee6ab32 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 13:48:36 +0400
Subject: [PATCH 40/44] linear2d_layer: add loss stopping and more iterations

---
 example/linear2d.f90 | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/example/linear2d.f90 b/example/linear2d.f90
index 79077723..980d45e4 100644
--- a/example/linear2d.f90
+++ b/example/linear2d.f90
@@ -4,11 +4,14 @@ program linear2d_example
   implicit none
 
   type(network) :: net
+  type(mse) :: loss
   real :: x(3, 4) = reshape( &
     [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], &
     [3, 4])
   real :: y(3) = [0.12, 0.1, 0.2]
-  integer, parameter :: num_iterations = 9
+  real :: preds(3)
+  real :: loss_value
+  integer, parameter :: num_iterations = 500
   integer :: n
   
   net = network([ &
@@ -18,12 +21,19 @@ program linear2d_example
   ])
   
   call net % print_info()
+  loss = mse()
 
   do n = 1, num_iterations
     call net % forward(x)
-    call net % backward(y, mse())
+    call net % backward(y, loss)
     call net % update(optimizer=sgd(learning_rate=0.01))
-    print '(i4,3(3x,f8.6))', n, net % predict(x)
+    preds = net % predict(x)
+    print '(i4,3(3x,f8.6))', n, preds
+    loss_value = loss % eval (y, preds)
+    if (loss_value < 0.01) then
+      print *, 'Loss: ', loss_value
+      return
+    end if
   end do
 
 end program linear2d_example
\ No newline at end of file

From 119a6c84b1afc16f89c00d3099471957573ac986 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 17 Feb 2025 14:08:20 +0400
Subject: [PATCH 41/44] linear2d_layer: update tests

---
 test/test_linear2d_layer.f90 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/test_linear2d_layer.f90 b/test/test_linear2d_layer.f90
index c345fddb..28b99bf0 100644
--- a/test/test_linear2d_layer.f90
+++ b/test/test_linear2d_layer.f90
@@ -135,6 +135,8 @@ subroutine test_linear2d_layer_gradient_updates(ok)
 
     linear = linear2d_layer(out_features=2)
     call linear % init([3, 4])
+    linear % weights = 0.1
+    linear % biases = 0.11
     call linear % forward(input)
     call linear % backward(input, gradient)
 

From 6f33ebe8a4ba4d68e631667f43be3f62b0511c75 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Mon, 17 Feb 2025 10:56:00 -0500
Subject: [PATCH 42/44] Tidy up

---
 example/linear2d.f90                   | 13 ++++++----
 src/nf/nf_linear2d_layer_submodule.f90 | 33 ++++++++++++++++----------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/example/linear2d.f90 b/example/linear2d.f90
index 980d45e4..f06b633c 100644
--- a/example/linear2d.f90
+++ b/example/linear2d.f90
@@ -4,14 +4,14 @@ program linear2d_example
   implicit none
 
   type(network) :: net
-  type(mse) :: loss
+  type(mse) :: loss = mse()
   real :: x(3, 4) = reshape( &
     [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], &
     [3, 4])
   real :: y(3) = [0.12, 0.1, 0.2]
   real :: preds(3)
   real :: loss_value
-  integer, parameter :: num_iterations = 500
+  integer, parameter :: num_iterations = 10000
   integer :: n
   
   net = network([ &
@@ -21,19 +21,22 @@ program linear2d_example
   ])
   
   call net % print_info()
-  loss = mse()
 
   do n = 1, num_iterations
+
     call net % forward(x)
     call net % backward(y, loss)
     call net % update(optimizer=sgd(learning_rate=0.01))
+
     preds = net % predict(x)
-    print '(i4,3(3x,f8.6))', n, preds
+    print '(i5,3(3x,f9.6))', n, preds
+
     loss_value = loss % eval (y, preds)
-    if (loss_value < 0.01) then
+    if (loss_value < 1e-4) then
       print *, 'Loss: ', loss_value
       return
     end if
+
   end do
 
 end program linear2d_example
\ No newline at end of file
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index effcd43f..eaa93411 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -2,7 +2,9 @@
   use nf_base_layer, only: base_layer
   use nf_random, only: random_normal
   implicit none
+
 contains
+
   module function linear2d_layer_cons(out_features) result(res)
     integer, intent(in) :: out_features
     type(linear2d_layer) :: res
@@ -10,12 +12,13 @@ module function linear2d_layer_cons(out_features) result(res)
     res % out_features = out_features
   end function linear2d_layer_cons
 
+
   module subroutine init(self, input_shape)
     class(linear2d_layer), intent(in out) :: self
     integer, intent(in) :: input_shape(:)
 
     if (size(input_shape) /= 2) then
-      error stop "Linear2D Layer accepts 2D input"
+      error stop "linear2d layer requires 2D input."
     end if
     self % sequence_length = input_shape(1)
     self % in_features = input_shape(2)
@@ -30,40 +33,45 @@ module subroutine init(self, input_shape)
     call random_normal(self % biases)
 
     allocate(self % dw(self % in_features, self % out_features))
-    self % dw = 0.0
+    self % dw = 0
     allocate(self % db(self % out_features))
-    self % db = 0.0
+    self % db = 0
+
   end subroutine init
 
+
   pure module subroutine forward(self, input)
     class(linear2d_layer), intent(in out) :: self
     real, intent(in) :: input(:, :)
     integer :: i
 
-    self % output(:, :) = matmul(input(:, :), self % weights)
-    do concurrent(i = 1: self % sequence_length)
-      self % output(i, :) = self % output(i, :) + self % biases
+    self % output(:,:) = matmul(input(:,:), self % weights)
+    do concurrent(i = 1:self % sequence_length)
+      self % output(i,:) = self % output(i,:) + self % biases
     end do
+
   end subroutine forward
 
+
   pure module subroutine backward(self, input, gradient)
     class(linear2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :)
-    real, intent(in) :: gradient(:, :)
+    real, intent(in) :: input(:,:)
+    real, intent(in) :: gradient(:,:)
     real :: db(self % out_features)
     real :: dw(self % in_features, self % out_features)
     integer :: i
 
-    self % dw = self % dw + matmul(transpose(input(:, :)), gradient(:, :))
-    self % db = self % db + sum(gradient(:, :), 1)
-    self % gradient(:, :) = matmul(gradient(:, :), transpose(self % weights))
+    self % dw = self % dw + matmul(transpose(input(:,:)), gradient(:,:))
+    self % db = self % db + sum(gradient(:,:), 1)
+    self % gradient(:,:) = matmul(gradient(:,:), transpose(self % weights))
   end subroutine backward
 
+
   pure module function get_num_params(self) result(num_params)
     class(linear2d_layer), intent(in) :: self
     integer :: num_params
 
-    ! Number of weigths times number of biases
+    ! Number of weights times number of biases
     num_params = self % in_features * self % out_features + self % out_features
 
   end function get_num_params
@@ -122,4 +130,5 @@ module subroutine set_params(self, params)
     end associate
 
   end subroutine set_params
+
 end submodule nf_linear2d_layer_submodule
\ No newline at end of file

From 678b2c09e795ae71740e005aa97d60110939405d Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Mon, 17 Feb 2025 11:19:06 -0500
Subject: [PATCH 43/44] Require passing only out_features to linear2d(); tidy
 up

---
 example/linear2d.f90                       |  2 +-
 src/nf/nf_layer_constructors.f90           |  9 +++++++--
 src/nf/nf_layer_constructors_submodule.f90 |  7 ++++---
 src/nf/nf_layer_submodule.f90              |  4 +++-
 src/nf/nf_linear2d_layer.f90               | 14 +++++++-------
 src/nf/nf_linear2d_layer_submodule.f90     |  2 ++
 6 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/example/linear2d.f90 b/example/linear2d.f90
index f06b633c..5a31fcf3 100644
--- a/example/linear2d.f90
+++ b/example/linear2d.f90
@@ -16,7 +16,7 @@ program linear2d_example
   
   net = network([ &
     input(3, 4), &
-    linear2d(3, 1), &
+    linear2d(1), &
     flatten() &
   ])
   
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index b5ea4dbe..2983ddcd 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -185,9 +185,14 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
-    module function linear2d(sequence_length, out_features) result(res)
-      integer, intent(in) :: sequence_length, out_features
+    module function linear2d(out_features) result(res)
+      !! Rank-2 (sequence_length, out_features) linear layer constructor.
+      !! sequence_length is determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: out_features
+        !! Number of output features
       type(layer) :: res
+        !! Resulting layer instance
     end function linear2d
 
   end interface
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 0724a15e..ae7d05dc 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -150,13 +150,14 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
-  module function linear2d(sequence_length, out_features) result(res)
-    integer, intent(in) :: sequence_length, out_features
+
+  module function linear2d(out_features) result(res)
+    integer, intent(in) :: out_features
     type(layer) :: res
 
     res % name = 'linear2d'
-    res % layer_shape = [sequence_length, out_features]
     allocate(res % p, source=linear2d_layer(out_features))
+
   end function linear2d
 
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index a5169ea4..22eabe9e 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -301,7 +301,7 @@ impure elemental module subroutine init(self, input)
       call this_layer % init(input % layer_shape)
     end select
 
-    ! The shape of conv2d, maxpool2d, or flatten layers is not known
+    ! The shape of linear2d, conv2d, maxpool2d, or flatten layers is not known
     ! until we receive an input layer.
     select type(this_layer => self % p)
       type is(conv2d_layer)
@@ -310,6 +310,8 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(linear2d_layer)
+        self % layer_shape = shape(this_layer % output)
     end select
 
     self % input_layer_shape = input % layer_shape
diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
index 6f9c6a31..f785a14c 100644
--- a/src/nf/nf_linear2d_layer.f90
+++ b/src/nf/nf_linear2d_layer.f90
@@ -11,11 +11,11 @@ module nf_linear2d_layer
   type, extends(base_layer) :: linear2d_layer
     integer :: sequence_length, in_features, out_features, batch_size
 
-    real, allocatable :: weights(:, :)
+    real, allocatable :: weights(:,:)
     real, allocatable :: biases(:)
-    real, allocatable :: output(:, :)
-    real, allocatable :: gradient(:, :) ! input gradient
-    real, allocatable :: dw(:, :) ! weight gradients
+    real, allocatable :: output(:,:)
+    real, allocatable :: gradient(:,:) ! input gradient
+    real, allocatable :: dw(:,:) ! weight gradients
     real, allocatable :: db(:) ! bias gradients
 
   contains
@@ -40,13 +40,13 @@ end function linear2d_layer_cons
   interface
     pure module subroutine forward(self, input)
       class(linear2d_layer), intent(in out) :: self
-      real, intent(in) :: input(:, :)
+      real, intent(in) :: input(:,:)
     end subroutine forward
 
     pure module subroutine backward(self, input, gradient)
       class(linear2d_layer), intent(in out) :: self
-      real, intent(in) :: input(:, :)
-      real, intent(in) :: gradient(:, :)
+      real, intent(in) :: input(:,:)
+      real, intent(in) :: gradient(:,:)
     end subroutine backward
 
     module subroutine init(self, input_shape)
diff --git a/src/nf/nf_linear2d_layer_submodule.f90 b/src/nf/nf_linear2d_layer_submodule.f90
index eaa93411..0dfe7e27 100644
--- a/src/nf/nf_linear2d_layer_submodule.f90
+++ b/src/nf/nf_linear2d_layer_submodule.f90
@@ -10,6 +10,7 @@ module function linear2d_layer_cons(out_features) result(res)
     type(linear2d_layer) :: res
 
     res % out_features = out_features
+
   end function linear2d_layer_cons
 
 
@@ -34,6 +35,7 @@ module subroutine init(self, input_shape)
 
     allocate(self % dw(self % in_features, self % out_features))
     self % dw = 0
+
     allocate(self % db(self % out_features))
     self % db = 0
 

From e78ef62b3fb83201828671af00afed808be23f33 Mon Sep 17 00:00:00 2001
From: milancurcic <caomaco@gmail.com>
Date: Mon, 17 Feb 2025 13:30:23 -0500
Subject: [PATCH 44/44] Remove linear2d example

---
 example/CMakeLists.txt |  1 -
 example/linear2d.f90   | 42 ------------------------------------------
 2 files changed, 43 deletions(-)
 delete mode 100644 example/linear2d.f90

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 0257dd7d..28cf71a7 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -2,7 +2,6 @@ foreach(execid
   cnn_mnist
   dense_mnist
   get_set_network_params
-  linear2d
   network_parameters
   simple
   sine
diff --git a/example/linear2d.f90 b/example/linear2d.f90
deleted file mode 100644
index 5a31fcf3..00000000
--- a/example/linear2d.f90
+++ /dev/null
@@ -1,42 +0,0 @@
-program linear2d_example
-
-  use nf, only: input, network, sgd, linear2d, mse, flatten
-  implicit none
-
-  type(network) :: net
-  type(mse) :: loss = mse()
-  real :: x(3, 4) = reshape( &
-    [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.11, 0.12, 0.13], &
-    [3, 4])
-  real :: y(3) = [0.12, 0.1, 0.2]
-  real :: preds(3)
-  real :: loss_value
-  integer, parameter :: num_iterations = 10000
-  integer :: n
-  
-  net = network([ &
-    input(3, 4), &
-    linear2d(1), &
-    flatten() &
-  ])
-  
-  call net % print_info()
-
-  do n = 1, num_iterations
-
-    call net % forward(x)
-    call net % backward(y, loss)
-    call net % update(optimizer=sgd(learning_rate=0.01))
-
-    preds = net % predict(x)
-    print '(i5,3(3x,f9.6))', n, preds
-
-    loss_value = loss % eval (y, preds)
-    if (loss_value < 1e-4) then
-      print *, 'Loss: ', loss_value
-      return
-    end if
-
-  end do
-
-end program linear2d_example
\ No newline at end of file