feat: Pass 1

shahrukhqasim · Nov 10, 2017 · 9a77d26 · 9a77d26
1 parent 175023b
commit 9a77d26
Show file tree

Hide file tree

Showing 10 changed files with 95 additions and 50 deletions.
diff --git a/python/network/ModuleA.py b/python/network/ModuleA.py
@@ -10,11 +10,13 @@ def __init__(self, D_in, D_out):
         # H1 = Variable(torch.randn(num_words, 100))
         # H2 = Variable(torch.randn(num_words, 100))
 
-        self.linear1 = torch.nn.Linear(D_in, 100)
-        self.linear2 = torch.nn.Linear(100, 100)
-        self.linear3 = torch.nn.Linear(100, D_out)
+        self.linear1 = torch.nn.Linear(D_in, 100).cuda()
+        self.linear2 = torch.nn.Linear(100, 100).cuda()
+        self.linear3 = torch.nn.Linear(100, D_out).cuda()
 
     def forward(self, x):
         o1 = self.linear1(x).clamp(min=0)
         o2 = self.linear2(o1).clamp(min=0)
-        return self.linear3(o2).clamp(min=0)
+        return self.linear3(o2).tanh()
+
+        # return self.linear1(x).tanh()
diff --git a/python/network/ModuleB.py b/python/network/ModuleB.py
@@ -6,7 +6,7 @@
 class ModuleB(torch.nn.Module):
     def __init__(self):
         super(ModuleB, self).__init__()
-        self.gru = torch.nn.GRUCell(500, 100)
+        self.gru = torch.nn.GRUCell(100, 100).cuda()
 
     def forward(self, x, hx):
         return self.gru.forward(x, hx)
diff --git a/python/network/ModuleB2.py b/python/network/ModuleB2.py
@@ -10,17 +10,17 @@ def __init__(self, D_in, D_out_1, D_out_2):
         # H1 = Variable(torch.randn(num_words, 100))
         # H2 = Variable(torch.randn(num_words, 100))
 
-        self.linear1 = torch.nn.Linear(D_in, 100)
-        self.linear2 = torch.nn.Linear(100, 100)
-        self.linear3 = torch.nn.Linear(100, D_out_1)
+        self.linear1 = torch.nn.Linear(D_in, 100).cuda()
+        self.linear2 = torch.nn.Linear(100, 100).cuda()
+        self.linear3 = torch.nn.Linear(100, D_out_1).cuda()
 
         #
         # H12 = Variable(torch.randn(num_words, 100))
         # H22 = Variable(torch.randn(num_words, 100))
 
-        self.linear12 = torch.nn.Linear(D_in, 100)
-        self.linear22 = torch.nn.Linear(100, 100)
-        self.linear32 = torch.nn.Linear(100, D_out_2)
+        self.linear12 = torch.nn.Linear(D_in, 100).cuda()
+        self.linear22 = torch.nn.Linear(100, 100).cuda()
+        self.linear32 = torch.nn.Linear(100, D_out_2).cuda()
 
     def forward(self, x):
         o1 = self.linear1(x).clamp(min=0)
@@ -29,4 +29,6 @@ def forward(self, x):
         o12 = self.linear12(x).clamp(min=0)
         o22 = self.linear22(o12).clamp(min=0)
 
-        return self.linear3(o2).clamp(min=0), self.linear32(o22).clamp(min=0)
+        return self.linear3(o2).tanh(), self.linear32(o22).tanh()
+
+        # return self.linear1(x).tanh(), self.linear12(x).tanh()
diff --git a/python/network/ModuleC.py b/python/network/ModuleC.py
@@ -10,11 +10,13 @@ def __init__(self, D_in, D_out):
         # H1 = Variable(torch.randn(num_words, 100))
         # H2 = Variable(torch.randn(num_words, 100))
 
-        self.linear1 = torch.nn.Linear(D_in, 100)
-        self.linear2 = torch.nn.Linear(100, 100)
-        self.linear3 = torch.nn.Linear(100, D_out)
+        self.linear1 = torch.nn.Linear(D_in, 100).cuda()
+        self.linear2 = torch.nn.Linear(100, 100).cuda()
+        self.linear3 = torch.nn.Linear(100, D_out).cuda()
 
     def forward(self, x):
         o1 = self.linear1(x).clamp(min=0)
         o2 = self.linear2(o1).clamp(min=0)
-        return self.linear3(o2).clamp(min=0)
+        return self.linear3(o2)
+
+        # return self.linear3(x)
diff --git a/python/network/ModuleCollect.py b/python/network/ModuleCollect.py
@@ -10,7 +10,7 @@ def __init__(self, D_in, num_words):
         self.num_words = num_words
 
     def forward(self, x, indices):
-        y = Variable(torch.zeros(self.num_words, self.D_in * 5))
+        y = Variable(torch.zeros(self.num_words, self.D_in * 5)).cuda()
 
         for i in range(self.num_words):
             y[i, 0:self.D_in] = x[i]

diff --git a/python/network/ModuleD.py b/python/network/ModuleD.py
@@ -7,11 +7,13 @@ class ModuleD(torch.nn.Module):
     def __init__(self, D_in, D_out):
         super(ModuleD, self).__init__()
 
-        self.linear1 = torch.nn.Linear(D_in, 100)
-        self.linear2 = torch.nn.Linear(100, 100)
-        self.linear3 = torch.nn.Linear(100, D_out)
+        self.linear1 = torch.nn.Linear(D_in, 100).cuda()
+        self.linear2 = torch.nn.Linear(100, 100).cuda()
+        self.linear3 = torch.nn.Linear(100, D_out).cuda()
 
     def forward(self, x):
         o1 = self.linear1(x).clamp(min=0)
         o2 = self.linear2(o1).clamp(min=0)
-        return self.linear3(o2).clamp(min=0)
+        return self.linear3(o2).tanh()
+
+        # return self.linear3(x).tanh()
diff --git a/python/network/computation_graph.py b/python/network/computation_graph.py
@@ -12,7 +12,7 @@
 class SimpleDocProcModel(torch.nn.Module):
     def __init__(self):
         super(SimpleDocProcModel, self).__init__()
-        self.k = 10
+        self.k = 8
         self.D_in = 300 + self.k
 
         self.A = ModuleA(self.D_in, 100)
@@ -26,27 +26,27 @@ def __init__(self):
     def set_iterations(self, iterations):
         self.iterations = iterations
 
-    def concat(self, x, indices, num_words):
-        y = Variable(torch.zeros(num_words, self.D_in * 5))
+    def concat(self, x, indices, indices_not_found, num_words):
+        y = Variable(torch.zeros(num_words, 100 * 5)).cuda()
+        y[:, 000:100] = x#[indices[:, 0]]
+        # y[:, 100:200] = x[indices[:, 1]]
+        # y[:, 200:300] = x[indices[:, 2]]
+        # y[:, 300:400] = x[indices[:, 3]]
+        # y[:, 400:500] = x[indices[:, 4]]
 
-        for i in range(num_words):
-            y[i, 0:self.D_in] = x[i]
-            y[i, self.D_in * 1:self.D_in * 2] = x[np.maximum(indices[i, 0], 0)] * int(indices[i, 0] != -1)
-            y[i, self.D_in * 2:self.D_in * 3] = x[np.maximum(indices[i, 1], 0)] * int(indices[i, 1] != -1)
-            y[i, self.D_in * 3:self.D_in * 4] = x[np.maximum(indices[i, 2], 0)] * int(indices[i, 2] != -1)
-            y[i, self.D_in * 4:self.D_in * 5] = x[np.maximum(indices[i, 3], 0)] * int(indices[i, 3] != -1)
+        # y[indices_not_found] = 0
 
         return y
 
-    def forward(self, indices, vv, num_words):
+    def forward(self, indices, indices_not_found, vv, num_words):
         uu = self.A.forward(vv)
-        hh = Variable(torch.zeros(num_words,100))
-
-        for i in range(self.iterations):
-            ww = self.concat(uu, indices)
-            bb = self.B.forward(ww, hh, num_words)
-            oo, hh = self.B2.forward(bb)
-            ll = self.C.forward(oo)
-            uu = self.D.forward(hh)
+        hh = Variable(torch.zeros(num_words,100)).cuda()
+
+        # for i in range(self.iterations):
+        #     # ww = self.concat(uu, indices, indices_not_found, num_words)
+        #     bb = self.B.forward(uu, hh)
+        #     oo, hh = self.B2.forward(bb)
+        #     ll = self.C.forward(oo)
+        #     uu = self.D.forward(hh)
 
         return ll
diff --git a/python/network/data_features_dumper.py b/python/network/data_features_dumper.py
@@ -27,14 +27,14 @@ def __init__(self, path, glove_path, cache_name):
     def dump_doc(self, all_tokens, all_tokens_rects, image, file_name):
         N = len(all_tokens)
         height, width = np.shape(image)
-        class_one_hot = np.zeros((N, 2))
+        classes = np.zeros(N)
         rect_matrix = np.zeros((N, 4))
         embeddings_matrix = np.zeros((N, 300))
         for i in range(N):
             token_rect = all_tokens_rects[i]
             index = 0 if image[int(token_rect['y'] + token_rect['height'] / 2), int(
                 token_rect['x'] + token_rect['width'] / 2)] == 0 else 1
-            class_one_hot[i, index] = 1
+            classes[i] = index
             rect_matrix[i, 0] = token_rect['x'] / width
             rect_matrix[i, 1] = token_rect['y'] / height
             rect_matrix[i, 2] = token_rect['width'] / width
@@ -51,7 +51,7 @@ def dump_doc(self, all_tokens, all_tokens_rects, image, file_name):
         neighbor_distance_matrix[:, 1] = neighbor_distance_matrix[:, 1] / height
         neighbor_distance_matrix[:, 2] = neighbor_distance_matrix[:, 2] / width
         neighbor_distance_matrix[:, 3] = neighbor_distance_matrix[:, 3] / height
-        document = DocumentFeatures(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, class_one_hot)
+        document = DocumentFeatures(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, classes)
         with open(file_name, 'wb') as f:
             pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)
 

diff --git a/python/network/test_all.py b/python/network/test_all.py
@@ -18,10 +18,10 @@ def setUp(self):
     def test_model_1(self):
         N = 500
         indices_test = (np.random.randint(0,N, (N,4))).astype(np.int32)
-        vv = Variable(torch.randn(N, 330))
+        vv = Variable(torch.randn(N, 308))
         model = SimpleDocProcModel()
         model.set_iterations(20)
-        ll = model.forward(indices_test, vv)
+        ll = model.forward(indices_test, vv, N)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/network/trainer.py b/python/network/trainer.py
@@ -4,10 +4,11 @@
 from network.silknet import LoadInterface
 from network.silknet.FolderDataReader import FolderDataReader
 from interface import implements
-import cv2
 import os
 import pickle
 from network.computation_graph import SimpleDocProcModel
+import torch
+from torch.autograd import Variable
 
 
 class DataLoader(implements(LoadInterface)):
@@ -24,6 +25,7 @@ def __init__(self):
         self.train_path = config['quad']['train_data_path']
         self.test_path = config['quad']['test_data_path']
         self.glove_path = config['quad']['glove_path']
+        self.learning_rate = float(config['quad']['learning_rate'])
 
     def init(self, dump_features_again):
         if dump_features_again:
@@ -35,9 +37,44 @@ def train(self):
         dataset = FolderDataReader(self.train_path, DataLoader())
         dataset.init()
         model = SimpleDocProcModel()
-        for i in range(300):
+        model.set_iterations(1)
+        criterion = torch.nn.CrossEntropyLoss(size_average=True)
+        optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)
+        for i in range(10000):
             document, epoch, id = dataset.next_element()
-            num_words, _ = np.shape(document.tokens_rects)
-            for j in range(300):
-                vv = np.concatenate([document.rects, document.distances, document.embeddings], axis=1)
-                model.forward(document.tokens_neighbor_matrix, vv, num_words)
+            num_words, _ = np.shape(document.rects)
+            vv = np.concatenate([document.rects, document.distances, document.embeddings * 0], axis=1).astype(np.float32)
+            vv = Variable(torch.from_numpy(vv)).cuda()
+            y = Variable(torch.from_numpy(document.classes.astype(np.int64)), requires_grad=False).cuda()
+
+            baseline_accuracy_1 = 100 * np.sum(document.classes==0) / num_words
+            baseline_accuracy_2 = 100 * np.sum(document.classes==1) / num_words
+
+            indices = torch.LongTensor(torch.from_numpy(np.concatenate(
+                [np.expand_dims(np.arange(num_words, dtype=np.int64), axis=1),
+                 np.maximum(document.neighbor_graph.astype(np.int64), 0)], axis=1))).cuda()
+            indices_not_found = torch.ByteTensor(torch.from_numpy(np.repeat(np.concatenate(
+                [np.expand_dims(np.zeros(num_words, dtype=np.int64), axis=1),
+                 document.neighbor_graph.astype(np.int64)], axis=1) == -1, 100).reshape((-1, 500)).astype(
+                np.uint8))).cuda()
+            indices_not_found = indices_not_found*0
+
+            for j in range(1):
+                y_pred = model(indices, indices_not_found, vv, num_words)
+                _, predicted = torch.max(y_pred.data, 1)
+                accuracy = torch.sum(predicted == y.data)
+                accuracy = 100 * accuracy / num_words
+
+                tables_pred = torch.sum(predicted == 0)
+                tables_pred = 100 * tables_pred / num_words
+
+                non_tables_pred = torch.sum(predicted == 1)
+                non_tables_pred = 100 * non_tables_pred / num_words
+
+
+                loss = criterion(y_pred, y)
+
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                print("%3dx%3d Loss = %f" %  (i, j, loss.data[0]), "Accuracy: %03.2f" % accuracy, "Tables: %03.2f" % tables_pred, "Non-tables: %03.2f" % non_tables_pred, "Base 1: %03.2f" % baseline_accuracy_1,"Base 2: %03.2f" % baseline_accuracy_2, torch.sum(y_pred).data[0])