Skip to content

Commit

Permalink
MOSTLY IGNORE
Browse files Browse the repository at this point in the history
  • Loading branch information
shahrukhqasim committed Nov 20, 2017
1 parent a99ab06 commit e67ad0e
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 7 deletions.
4 changes: 2 additions & 2 deletions python/tools/convert_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def see_table(self, table, increment):
table_json['table']['x2'] = x2
table_json['table']['y2'] = y2

cv2.rectangle(self.table_segment, (int(x1),int(y1)), (int(x2), int(y2)), 255, cv2.FILLED)
cv2.rectangle(self.table_segment, (int(x1),int(y1)), (int(x2), int(y2)), increment, cv2.FILLED)

def see_doc(self):
tree = ET.parse(self.xml_path)
root = tree.getroot()
tables = root.find('Tables')
i = 0
i = 1
for table in tables:
self.see_table(table, i)
i += 1
Expand Down
79 changes: 74 additions & 5 deletions python/tools/prepare_dataset_for_table_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
import pickle
from network.table_data import TableData

show = True
show = False
show_ocr = False
dont_output = True
dont_output = False

images_path = '/home/srq/Datasets/tables/unlv'
tables_gt_path = '/home/srq/Datasets/tables/unlv/unlv_xml_gt'
Expand Down Expand Up @@ -137,9 +137,78 @@ def see_table(self, table, increment):
col_share_matrix = np.zeros((N, N))
cell_share_matrix = np.zeros((N, N))

neighbors_same_row = np.zeros((N,4))
neighbors_same_col = np.zeros((N,4))
neighbors_same_cell = np.zeros((N,4))

graph_builder = NeighborGraphBuilder(all_tokens_rects, data_image[:,:,0])
M, D = graph_builder.get_neighbor_matrix()

for i in range(N):
left_index = int(M[i,0])
top_index = int(M[i,1])
right_index = int(M[i,2])
bottom_index = int(M[i,3])

token_rect = all_tokens_rects[i]
mid = [int(token_rect['x'] + token_rect['width'] / 2), int(token_rect['y'] + token_rect['height'] / 2)]

if left_index != -1:
token_rect_2 = all_tokens_rects[left_index]
mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
int(token_rect_2['y'] + token_rect_2['height'] / 2)]
# They share row
if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]:
neighbors_same_row[i, 0] = 1
# They share column
if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]:
neighbors_same_col[i, 0] = 1
# They share cell
if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]:
neighbors_same_cell[i, 0] = 1

if top_index != -1:
token_rect_2 = all_tokens_rects[top_index]
mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
int(token_rect_2['y'] + token_rect_2['height'] / 2)]
# They share row
if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]:
neighbors_same_row[i, 1] = 1
# They share column
if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]:
neighbors_same_col[i, 1] = 1
# They share cell
if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]:
neighbors_same_cell[i, 1] = 1

if right_index != -1:
token_rect_2 = all_tokens_rects[right_index]
mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
int(token_rect_2['y'] + token_rect_2['height'] / 2)]
# They share row
if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]:
neighbors_same_row[i, 2] = 1
# They share column
if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]:
neighbors_same_col[i, 2] = 1
# They share cell
if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]:
neighbors_same_cell[i, 2] = 1

if bottom_index != -1:
token_rect_2 = all_tokens_rects[bottom_index]
mid_2 = [int(token_rect_2['x'] + token_rect_2['width'] / 2),
int(token_rect_2['y'] + token_rect_2['height'] / 2)]
# They share row
if data_image[mid[1], mid[0], 0] == data_image[mid_2[1], mid_2[0], 0]:
neighbors_same_row[i, 3] = 1
# They share column
if data_image[mid[1], mid[0], 1] == data_image[mid_2[1], mid_2[0], 1]:
neighbors_same_col[i, 3] = 1
# They share cell
if data_image[mid[1], mid[0], 2] == data_image[mid_2[1], mid_2[0], 2]:
neighbors_same_cell[i, 3] = 1

for i in range(N):
token = all_tokens[i]
token_rect = all_tokens_rects[i]
Expand All @@ -160,7 +229,7 @@ def see_table(self, table, increment):
cell_share_matrix[i, j] = 1


self.dump_table(all_tokens, all_tokens_rects, M, D, row_share_matrix, col_share_matrix, cell_share_matrix, show_1, os.path.join(sorted_path_full, '__dump__.pickle'))
self.dump_table(all_tokens, all_tokens_rects, M, D, row_share_matrix, col_share_matrix, cell_share_matrix, neighbors_same_row, neighbors_same_col, neighbors_same_cell, show_1, os.path.join(sorted_path_full, '__dump__.pickle'))
cv2.imwrite(os.path.join(sorted_path_full, 'visual.png'), show_1)

def do_plot(self, document, id):
Expand All @@ -175,7 +244,7 @@ def do_plot(self, document, id):
cv2.waitKey(0)

def dump_table(self, all_tokens, all_tokens_rects, neighbor_graph, neighbor_distance_matrix, share_row_matrix,
share_col_matrix, share_cell_matrix, image_visual, file_name):
share_col_matrix, share_cell_matrix, neighbors_same_row, neighbors_same_col, neighbors_same_cell, image_visual, file_name):
N = len(all_tokens)
height, width, _ = np.shape(image_visual)
classes = np.zeros(N)
Expand All @@ -192,7 +261,7 @@ def dump_table(self, all_tokens, all_tokens_rects, neighbor_graph, neighbor_dist
embedding = np.ones((300)) * (-1)
embeddings_matrix[i] = embedding

document = TableData(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, share_row_matrix, share_col_matrix, share_cell_matrix)
document = TableData(embeddings_matrix, rect_matrix, neighbor_distance_matrix, neighbor_graph, share_row_matrix, share_col_matrix, share_cell_matrix, neighbors_same_row, neighbors_same_col, neighbors_same_cell)

if show:
self.do_plot(document, file_name)
Expand Down

0 comments on commit e67ad0e

Please sign in to comment.