feat: Add script for prepare cell share dataset

Also some fixes
shahrukhqasim · Dec 5, 2017 · 1f905a3 · 1f905a3
1 parent 896f448
commit 1f905a3
Show file tree

Hide file tree

Showing 4 changed files with 609 additions and 34 deletions.
diff --git a/python/table_parse_2d/document_for_table_parse.py b/python/table_parse_2d/document_for_table_parse.py
@@ -0,0 +1,7 @@
+
+class TableParseDocument:
+    def __init__(self, input_tensor, classes_tensor, word_mask, zone_mask):
+        self.input_tensor = input_tensor # 256x256x308
+        self.classes_tensor = classes_tensor # 256x256x4
+        self.word_mask = word_mask # 256x256
+        self.zone_mask = zone_mask # 256x256
diff --git a/python/table_parse_2d/parser_2d.py b/python/table_parse_2d/parser_2d.py
@@ -9,6 +9,7 @@
 from tensorflow.contrib.ndlstm.python import lstm2d as lstm2d_lib
 import numpy as np
 import cv2
+import sys
 
 slim = tf.contrib.slim
 
@@ -213,6 +214,12 @@ def train(self):
 
 
 if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print("Error in arguments")
+        print("Usage: python table_parse_2d/parser_2d.py [config_section_name]")
+        print("\tFor example, if the section name in the config file is `zone_segment` you can issue:\n"
+              "\tpython table_parse_2d/parser_2d.py zone_segment")
+        sys.exit(-1)
     parser = Parser2d()
     parser.construct_graphs()
     parser.train()
diff --git a/python/table_parse_2d/prepare_marg.py b/python/table_parse_2d/prepare_marg.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import json
 import xml.etree.ElementTree as ET
 import cv2
@@ -13,23 +14,28 @@
 import math
 from table_parse_2d.document_for_zone_segment import ZoneSegmentDocument
 import gzip
+from PIL import Image
+import matplotlib.image
+from numpy import zeros, newaxis
+import configparser as cp
 
 show = False
 show_ocr = False
 dont_output = False
 
-input_path = '/home/srq/Datasets/fmarg/combined'
-
-test_division_txt = '/home/srq/Datasets/fmarg/division/test.txt'
-train_division_txt = '/home/srq/Datasets/fmarg/division/train.txt'
-validate_division_txt = '/home/srq/Datasets/fmarg/division/validate.txt'
 
-test_out = '/home/srq/Datasets/fmarg/marg-for-div/test'
-train_out = '/home/srq/Datasets/fmarg/marg-for-div/train'
-validate_out = '/home/srq/Datasets/fmarg/marg-for-div/validate'
+config = cp.ConfigParser()
+config.read('config.ini')
 
-glove_path = '/media/srq/Seagate Expansion Drive1/Models/GloVe/glove.840B.300d.txt'
-cache_name = 'marg_complete'
+input_path=config['dataset_prepare_marg']['input_path']
+test_division_txt=config['dataset_prepare_marg']['test_division_txt']
+train_division_txt=config['dataset_prepare_marg']['train_division_txt']
+validate_division_txt=config['dataset_prepare_marg']['validate_division_txt']
+test_out=config['dataset_prepare_marg']['test_out']
+train_out=config['dataset_prepare_marg']['train_out']
+validate_out=config['dataset_prepare_marg']['validate_out']
+glove_path=config['dataset_prepare_marg']['glove_path']
+cache_name=config['dataset_prepare_marg']['cache_name']
 
 
 # Pick up train/test/validate split
@@ -65,23 +71,23 @@ def execute_zone(self, zone, zone_id):
         x2 = int(vertices[2].attrib['x'])
         y2 = int(vertices[2].attrib['y'])
 
-        self.zone_segmentation[y1:y2-1, x1:x2-1] = zone_id
+        self.zone_segmentation[max(0, y1 - 1):(y2), max(0, x1 - 1):(x2)] = zone_id
 
     def execute_tokens(self):
         # To get local neighbors of each token: Left, right, top, bottom
-        graph_builder = NeighborGraphBuilder(self.all_tokens_rects, self.image[:,:,0])
+        graph_builder = NeighborGraphBuilder(self.all_tokens_rects, self.image[:, :, 0])
         # M is the indices graph and D is distance matrix
         M, D = graph_builder.get_neighbor_matrix()
 
         N = len(self.all_tokens)
 
-        neighbors_same_zone = np.zeros((N,4))
+        neighbors_same_zone = np.zeros((N, 4))
 
         for i in range(N):
-            left_index = int(M[i,0])
-            top_index = int(M[i,1])
-            right_index = int(M[i,2])
-            bottom_index = int(M[i,3])
+            left_index = int(M[i, 0])
+            top_index = int(M[i, 1])
+            right_index = int(M[i, 2])
+            bottom_index = int(M[i, 3])
 
             token_rect = self.all_tokens_rects[i]
             mid = [int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2)]
@@ -93,6 +99,10 @@ def execute_tokens(self):
                 # They share zone
                 if self.zone_segmentation[mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]:
                     neighbors_same_zone[i, 0] = 1
+                else:
+                    neighbors_same_zone[i, 0] = 110
+            else:
+                neighbors_same_zone[i, 0] = 1
 
             if top_index != -1:
                 token_rect_2 = self.all_tokens_rects[top_index]
@@ -101,6 +111,10 @@ def execute_tokens(self):
                 # They share zone
                 if self.zone_segmentation[mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]:
                     neighbors_same_zone[i, 1] = 1
+                else:
+                    neighbors_same_zone[i, 1] = 110
+            else:
+                neighbors_same_zone[i, 0] = 1
 
             if right_index != -1:
                 token_rect_2 = self.all_tokens_rects[right_index]
@@ -109,6 +123,10 @@ def execute_tokens(self):
                 # They share zone
                 if self.zone_segmentation[mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]:
                     neighbors_same_zone[i, 2] = 1
+                else:
+                    neighbors_same_zone[i, 1] = 110
+            else:
+                neighbors_same_zone[i, 0] = 1
 
             if bottom_index != -1:
                 token_rect_2 = self.all_tokens_rects[bottom_index]
@@ -117,23 +135,36 @@ def execute_tokens(self):
                 # They share zone
                 if self.zone_segmentation[mid[1], mid[0]] == self.zone_segmentation[mid_2[1], mid_2[0]]:
                     neighbors_same_zone[i, 3] = 1
+                else:
+                    neighbors_same_zone[i, 1] = 110
+
+            else:
+                neighbors_same_zone[i, 0] = 1
 
         # To place input vectors at respective spatial coordinates
         input_tensor = np.zeros((256, 256, 308)).astype(np.float64)
         # Same zone or not, 0 for not, 1 for yes
         output_tensor = np.zeros((256, 256, 4)).astype(np.float64)
         # Whether there was a word here or not
+        # output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)
         output_tensor_word_mask = np.zeros((256, 256)).astype(np.float64)
+
         # Whether there was a zone here or not
         self.zone_segmentation[self.zone_segmentation != 0] = 1
-        output_tensor_zone_mask = cv2.resize(self.zone_segmentation, (256,256))
+
+        output_tensor_zone_mask = cv2.resize(self.zone_segmentation, (256, 256))
+        # output_tensor_zone_mask = output_tensor_zone_mask_temp.reshape(-1,3)
         for i in range(N):
             token_rect = self.all_tokens_rects[i]
+            #            mid = [int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2)]
             # Source coordinates of top left of tokens
             cx = token_rect['x']
             cy = token_rect['y']
             cw = token_rect['width']
             ch = token_rect['height']
+            #            token_rect_2 = self.all_tokens_rects[top_index]
+            #            mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
+            #                         int(token_rect_2['y'] + token_rect_2['height'] / 2)]
 
 
             distances_vector = D[i]
@@ -153,20 +184,48 @@ def execute_tokens(self):
             input_tensor[ny, nx] = np.concatenate((embedding, positional))
 
             # From the neighbor graph
-            output_tensor[ny, nx] = np.array([neighbors_same_zone[i, 0], neighbors_same_zone[i, 1], neighbors_same_zone[i, 2],
-                                              neighbors_same_zone[i, 3]])
-            # Set mask to 1
-            output_tensor_word_mask[ny, nx] = 1
+            output_tensor[ny, nx] = np.array(
+                [neighbors_same_zone[i, 0], neighbors_same_zone[i, 1], neighbors_same_zone[i, 2],
+                 neighbors_same_zone[i, 3]])
+
+            if any(x == 110 for x in output_tensor[ny, nx]):
+                output_tensor_word_mask[ny, nx] = 110
+            else:
+                output_tensor_word_mask[ny, nx] = 1
+                # Set mask to 1
+                # output_tensor_word_mask[ny, nx] =1
+                # print (output_tensor_word_mask[ny, nx])
 
         print(self.sorted_path)
+        rgb = np.zeros((256, 256, 3))
+        for i in range(output_tensor_word_mask.shape[0]):
+            for j in range(output_tensor_word_mask.shape[1]):
+                if output_tensor_word_mask[i, j] == 1.0:
+                    rgb[i, j, 0] = 255
+                    rgb[i, j, 1] = 255
+                    rgb[i, j, 2] = 255
+                elif output_tensor_word_mask[i, j] == 110.0:
+                    rgb[i, j, 0] = 255
+                    rgb[i, j, 1] = 0
+                    rgb[i, j, 2] = 0
 
         # Output debugging visual file for zone mask
         segmentation_visualize_path = os.path.join(self.sorted_path, 'visual_segment.png')
-        cv2.imwrite(segmentation_visualize_path, (output_tensor_zone_mask*255).astype(np.uint8))
+        cv2.imwrite(segmentation_visualize_path, (output_tensor_zone_mask * 255).astype(np.uint8))
 
         # Output debugging visual image for word mask
         word_mask_path = os.path.join(self.sorted_path, 'visual_word_mask.png')
-        cv2.imwrite(word_mask_path, (output_tensor_word_mask * 255).astype(np.uint8))
+        output_tensor_word_mask_temp = (rgb.transpose((2, 0, 1)) * output_tensor_zone_mask).transpose(1, 2, 0)
+        # output_tensor_word_mask_temp=rgb*np.repeat(output_tensor_zone_mask,3).reshape((256,256,3))
+        print(output_tensor_word_mask_temp.shape)
+
+        # output_tensor_zone_mask_temp  = np.resize(output_tensor_zone_mask, (256, 256, 3))
+
+        # output_tensor_word_mask=np.multiply(rgb,output_tensor_zone_mask_temp )
+        matplotlib.image.imsave(word_mask_path, rgb.astype(np.uint8))
+        word_mask_path_1 = os.path.join(self.sorted_path, 'visual_word_mask_masked.png')
+        matplotlib.image.imsave(word_mask_path_1, output_tensor_word_mask_temp.astype(np.uint8))
+        # cv2.imwrite(word_mask_path, (output_tensor_word_mask *255).astype(np.uint8))
 
         # Dump the content to pickle file. The file is compressed by gzip.
         dump_path = os.path.join(self.sorted_path, '__dump__.pklz')
@@ -211,9 +270,6 @@ def see_words(self):
                 token_rect['width'] = int(divided_width)
                 self.all_tokens_rects.append(token_rect)
 
-
-
-
     @staticmethod
     def pick_up_words(json_path, image_path):
         image = cv2.imread(image_path, 0)
@@ -227,8 +283,9 @@ def pick_up_words(json_path, image_path):
         for i in range(len(ocr_data)):
 
             word_data = ocr_data[i]
-            x, y, width, height, word = int(word_data['rect']['x']), int(word_data['rect']['y']), int(word_data['rect']['width']), \
-                                   int(word_data['rect']['height']), word_data['word']
+            x, y, width, height, word = int(word_data['rect']['x']), int(word_data['rect']['y']), int(
+                word_data['rect']['width']), \
+                                        int(word_data['rect']['height']), word_data['word']
 
             word_data_2 = {'rect': {'x': x, 'y': y, 'width': width, 'height': height}, 'word': word}
 
@@ -241,7 +298,6 @@ def pick_up_words(json_path, image_path):
         return ocr_data_2, nlp_tokens_all
 
 
-
 # print("Loading dictionary")
 nlp_tokens = set()
 # i = 0
@@ -264,7 +320,6 @@ def pick_up_words(json_path, image_path):
 glove_reader = GLoVe(glove_path, nlp_tokens)
 glove_reader.load(cache_name)
 
-
 last_id = 1
 
 for parent_path in os.listdir(input_path):
@@ -283,9 +338,10 @@ def pick_up_words(json_path, image_path):
         if not sub_file.endswith('.png'):
             continue
         id = os.path.splitext(sub_file)[0]
-        png_path = os.path.join(parent_path_full, id+'.png')
-        xml_path = os.path.join(parent_path_full, id+'.xml')
-        json_path = os.path.join(parent_path_full, id+'.json')
+        png_path = os.path.join(parent_path_full, id + '.png')
+        print(png_path)
+        xml_path = os.path.join(parent_path_full, id + '.xml')
+        json_path = os.path.join(parent_path_full, id + '.json')
         sorted_path = os.path.join(out_path, str(last_id))
         if not os.path.exists(sorted_path):
             os.mkdir(sorted_path)