xor test - intermediate state

fm94 · fm94 · commit 2926dc14c890 · 2024-06-07T23:18:00.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .vscode/
 .vs/
 build/
+CMakeSettings.json
diff --git a/include/data/dataset.hpp b/include/data/dataset.hpp
@@ -9,32 +9,32 @@ namespace Tipousi
         class Dataset
         {
           public:
-            Dataset(const Eigen::MatrixXd &X, const Eigen::MatrixXd &Y);
+            Dataset(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y);
             ~Dataset() = default;
 
             using DataPair = std::pair<Eigen::MatrixXf, Eigen::MatrixXf>;
 
             class Iterator
             {
               public:
-                Iterator(const Eigen::MatrixXd &X, const Eigen::MatrixXd &Y,
+                Iterator(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y,
                          size_t index);
                 Iterator &operator++();
                 bool      operator!=(const Iterator &other) const;
                 DataPair  operator*() const;
 
               private:
-                const Eigen::MatrixXd &m_X;
-                const Eigen::MatrixXd &m_y;
+                const Eigen::MatrixXf &m_X;
+                const Eigen::MatrixXf &m_y;
                 size_t                 m_index;
             };
 
             Iterator begin() const;
             Iterator end() const;
 
           private:
-            Eigen::MatrixXd m_X;
-            Eigen::MatrixXd m_y;
+            Eigen::MatrixXf m_X;
+            Eigen::MatrixXf m_y;
         };
     };  // namespace Data
 };      // namespace Tipousi
diff --git a/include/graph/node.hpp b/include/graph/node.hpp
@@ -19,7 +19,7 @@ namespace Tipousi
             }
 
             void forward(Eigen::MatrixXf &data);
-            void backward(const Eigen::MatrixXf &dout, Eigen::MatrixXf &ddout);
+            void backward(Eigen::MatrixXf &grads);
 
             void add_input(Node *node);
             void add_output(Node *node);
diff --git a/include/graph/sequential.hpp b/include/graph/sequential.hpp
@@ -26,7 +26,7 @@ namespace Tipousi
             }
 
             void forward(const Eigen::MatrixXf &in, Eigen::MatrixXf &out);
-            void backward();
+            void backward(Eigen::MatrixXf &initial_grads);
 
             void train(const Data::Dataset            &dataset,
                        const Optimizer::OptimizerBase &optimizer,
diff --git a/include/loss/mse.hpp b/include/loss/mse.hpp
@@ -7,6 +7,7 @@ namespace Tipousi
     {
         class MSE : public LossBase
         {
+          public:
             MSE()  = default;
             ~MSE() = default;
 
diff --git a/src/data/dataset.cpp b/src/data/dataset.cpp
@@ -4,13 +4,13 @@ namespace Tipousi
 {
     namespace Data
     {
-        Dataset::Dataset(const Eigen::MatrixXd &X, const Eigen::MatrixXd &Y)
+        Dataset::Dataset(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y)
             : m_X(X), m_y(Y)
         {
         }
 
-        Dataset::Iterator::Iterator(const Eigen::MatrixXd &X,
-                                    const Eigen::MatrixXd &Y, size_t index)
+        Dataset::Iterator::Iterator(const Eigen::MatrixXf &X,
+                                    const Eigen::MatrixXf &Y, size_t index)
             : m_X(X), m_y(Y), m_index(index)
         {
         }
diff --git a/src/graph/node.cpp b/src/graph/node.cpp
@@ -20,13 +20,13 @@ namespace Tipousi
 
             // if we have mutliple inputs then save forward computations by
             // caching the current data. Experimental!!!
-            if (m_outputs.size() > 1)
-            {
+            //if (m_outputs.size() > 1)
+            // {
                 // m_cache
-            }
+            // }
         }
 
-        void Node::backward(const Eigen::MatrixXf &dout, Eigen::MatrixXf &ddout)
+        void Node::backward(Eigen::MatrixXf &grads)
         {
             // std::vector<float> grad_input =
             // m_operation->backward(grad_output); for (auto *input_node :
@@ -36,7 +36,7 @@ namespace Tipousi
             // }
 
             // TODO this is a fake call that is used only if the node has one Op
-            m_operation->backward(dout, ddout);
+            m_operation->backward(grads, grads);
         }
 
         void Node::add_input(Node *node)
diff --git a/src/graph/sequential.cpp b/src/graph/sequential.cpp
@@ -32,6 +32,8 @@ namespace Tipousi
         void Sequential::forward(const Eigen::MatrixXf &in,
                                  Eigen::MatrixXf       &out)
         {
+            // in should be const?
+            // 
             // copy to create the object that will be passed through the network
             // while keeping the original data intact
             Eigen::MatrixXf data_copy    = in;
@@ -54,9 +56,26 @@ namespace Tipousi
             out = data_copy;  // copying happening?
         }
 
-        void Sequential::backward()
+        void Sequential::backward(Eigen::MatrixXf &initial_grads)
         {
-            //
+            // think if copying is really needed here
+            Eigen::MatrixXf grad_copy    = initial_grads;
+            Node           *current_node = m_output_node;
+            // continue until no more nodes
+            while (true)
+            {
+                if (current_node)
+                {
+                    // TODO hacky approachs: always take number 0
+                    auto &input_nodes = current_node->get_inputs();
+                    if (input_nodes.size() == 0 || !input_nodes[0])
+                    {
+                        break;
+                    }
+                    current_node->backward(grad_copy);
+                    current_node = input_nodes[0];
+                }
+            }
         }
 
         void Sequential::train(const Data::Dataset            &dataset,
@@ -75,7 +94,7 @@ namespace Tipousi
                     forward(x, output);
                     total_loss += loss_func.compute(y, output);
                     loss_func.grad(out_grad, y, output);
-                    backward();
+                    backward(out_grad);
                     counter++;
                 }
                 std::cout << "Epoch: " << i
diff --git a/tests/test_simple_net.cpp b/tests/test_simple_net.cpp
@@ -32,11 +32,11 @@ TEST(SimpleNetTest, SimpleCreation)
     node4->add_input(node3);  // node4 depends on node2
 
     // create the graph (pass input and output nodes)
-    float      learning_rate{0.001};
+    float      learning_rate{0.001f};
     Sequential net(node1, node4, learning_rate);
 
     // test inference
-    int  n_samples{32};
+    int  n_samples{10};
     auto features = Eigen::MatrixXf::Random(n_samples, n_features);
     auto labels   = Eigen::MatrixXf(n_samples, n_labels);
 
@@ -53,5 +53,7 @@ TEST(SimpleNetTest, SimpleCreation)
               << std::endl;
 
     // backward pass
-    EXPECT_NO_THROW(net.backward());
+    // todo: add real values here, can't do backward pass with empty matrix
+    // Eigen::MatrixXf out_grad;
+    // EXPECT_NO_THROW(net.backward(out_grad));
 }
diff --git a/tests/test_softmax.cpp b/tests/test_softmax.cpp
@@ -17,7 +17,7 @@ TEST(SoftmaxLayerTest, ForwardPass)
     Eigen::MatrixXf expected_output(2, 2);
     expected_output << 0.268941f, 0.731059f, 0.268941f, 0.731059f;
 
-    expectEigenMatrixNear(output, expected_output, 1e-5);
+    expectEigenMatrixNear(output, expected_output, 1e-5f);
 }
 
 TEST(SoftmaxLayerTest, BackwardPass)
@@ -39,5 +39,5 @@ TEST(SoftmaxLayerTest, BackwardPass)
     Eigen::MatrixXf expected_in_grad(2, 2);
     expected_in_grad << 0.0393f, -0.0393f, 0.0393f, -0.0393f;
 
-    expectEigenMatrixNear(in_grad, expected_in_grad, 1e-3);
+    expectEigenMatrixNear(in_grad, expected_in_grad, 1e-3f);
 }
diff --git a/tests/test_xor_inference.cpp b/tests/test_xor_inference.cpp
@@ -0,0 +1,76 @@
+#include "activation/relu.hpp"
+#include "activation/softmax.hpp"
+#include "data/dataset.hpp"
+#include "graph/node.hpp"
+#include "graph/sequential.hpp"
+#include "layer/dense.hpp"
+#include "loss/mse.hpp"
+#include "optimizer/sgd.hpp"
+#include <chrono>
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+
+using namespace Tipousi;
+using namespace Graph;
+using namespace Layer;
+using namespace Activation;
+using namespace Loss;
+using namespace Data;
+using namespace Optimizer;
+
+TEST(SimpleNetTest, XORTest)
+{
+    // in this test we try to train a net to lear the xor operation
+    // we have two inputs and one output
+    int n_features{2};
+    int n_labels{1};
+
+    Node *node1 = Node::create<Dense>(n_features, 32);
+    Node *node2 = Node::create<ReLU>();
+    Node *node3 = Node::create<Dense>(32, n_labels);
+    Node *node4 = Node::create<Softmax>();
+
+    // build the dependencies
+    node2->add_input(node1);  // node2 depends on node1
+    node3->add_input(node2);  // node3 depends on node2
+    node4->add_input(node3);  // node4 depends on node2
+
+    // create the graph (pass input and output nodes)
+    float      learning_rate{0.001f};
+    Sequential net(node1, node4, learning_rate);
+
+    // test inference
+    Eigen::MatrixXf X(4, 2);
+    Eigen::MatrixXf Y(4, 1);
+    // XOR inputs
+    X << 0, 0, 0, 1, 1, 0, 1, 1;
+    // XOR outputs (labels)
+    Y << 0, 1, 1, 0;
+
+    Eigen::MatrixXf preds;
+    net.forward(X, preds);
+
+    // create dataset
+    // TODO add eppochs etc
+    Dataset dataset(X, Y);
+
+    // define the optimizer and the loss
+    SGD sgd;
+    MSE mse;
+    net.train(dataset, sgd, mse, 10);
+
+    // forward pass with time measurement
+    // Eigen::MatrixXf preds;
+    auto            start = std::chrono::high_resolution_clock::now();
+    net.forward(X, preds);
+    auto end = std::chrono::high_resolution_clock::now();
+    auto duration =
+        std::chrono::duration_cast<std::chrono::microseconds>(end - start)
+            .count();
+    std::cout << "Forward pass execution time: " << duration << " microseconds"
+              << std::endl;
+
+    // compute the loss
+    std::cout << "The loss of the network after training: " << mse.compute(Y, preds) << std::endl;
+}

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ namespace Tipousi`
`19`	`19`	`}`
`20`	`20`
`21`	`21`	`void forward(Eigen::MatrixXf &data);`
`22`		`- void backward(const Eigen::MatrixXf &dout, Eigen::MatrixXf &ddout);`
	`22`	`+ void backward(Eigen::MatrixXf &grads);`
`23`	`23`
`24`	`24`	`void add_input(Node *node);`
`25`	`25`	`void add_output(Node *node);`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ namespace Tipousi`
`26`	`26`	`}`
`27`	`27`
`28`	`28`	`void forward(const Eigen::MatrixXf &in, Eigen::MatrixXf &out);`
`29`		`- void backward();`
	`29`	`+ void backward(Eigen::MatrixXf &initial_grads);`
`30`	`30`
`31`	`31`	`void train(const Data::Dataset &dataset,`
`32`	`32`	`const Optimizer::OptimizerBase &optimizer,`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ namespace Tipousi`
`7`	`7`	`{`
`8`	`8`	`class MSE : public LossBase`
`9`	`9`	`{`
	`10`	`+ public:`
`10`	`11`	`MSE() = default;`
`11`	`12`	`~MSE() = default;`
`12`	`13`
Original file line number	Diff line number	Diff line change
`@@ -4,13 +4,13 @@ namespace Tipousi`
`4`	`4`	`{`
`5`	`5`	`namespace Data`
`6`	`6`	`{`
`7`		`- Dataset::Dataset(const Eigen::MatrixXd &X, const Eigen::MatrixXd &Y)`
	`7`	`+ Dataset::Dataset(const Eigen::MatrixXf &X, const Eigen::MatrixXf &Y)`
`8`	`8`	`: m_X(X), m_y(Y)`
`9`	`9`	`{`
`10`	`10`	`}`
`11`	`11`
`12`		`- Dataset::Iterator::Iterator(const Eigen::MatrixXd &X,`
`13`		`- const Eigen::MatrixXd &Y, size_t index)`
	`12`	`+ Dataset::Iterator::Iterator(const Eigen::MatrixXf &X,`
	`13`	`+ const Eigen::MatrixXf &Y, size_t index)`
`14`	`14`	`: m_X(X), m_y(Y), m_index(index)`
`15`	`15`	`{`
`16`	`16`	`}`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ TEST(SoftmaxLayerTest, ForwardPass)`
`17`	`17`	`Eigen::MatrixXf expected_output(2, 2);`
`18`	`18`	`expected_output << 0.268941f, 0.731059f, 0.268941f, 0.731059f;`
`19`	`19`
`20`		`- expectEigenMatrixNear(output, expected_output, 1e-5);`
	`20`	`+ expectEigenMatrixNear(output, expected_output, 1e-5f);`
`21`	`21`	`}`
`22`	`22`
`23`	`23`	`TEST(SoftmaxLayerTest, BackwardPass)`
`@@ -39,5 +39,5 @@ TEST(SoftmaxLayerTest, BackwardPass)`
`39`	`39`	`Eigen::MatrixXf expected_in_grad(2, 2);`
`40`	`40`	`expected_in_grad << 0.0393f, -0.0393f, 0.0393f, -0.0393f;`
`41`	`41`
`42`		`- expectEigenMatrixNear(in_grad, expected_in_grad, 1e-3);`
	`42`	`+ expectEigenMatrixNear(in_grad, expected_in_grad, 1e-3f);`
`43`	`43`	`}`