You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
hi @NielsRogge@bsmock , i have use paddleocr instead of easyocr in table transfomer nd i have an issue can u resolve it !!!
# load paddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')
def apply_ocr(cell_coordinates, cropped_table):
# let's OCR row by row
data = dict()
max_num_columns = 0
for idx, row in enumerate(cell_coordinates):
row_text = []
for cell in row["cells"]:
# crop cell out of image
cell_image = np.array(cropped_table.crop(cell["cell"]))
# apply OCR
result = ocr.ocr(np.array(cell_image), cls=True)
if len(result) > 0:
text = " ".join([x[1] for x in result])
row_text.append(text)
if len(row_text) > max_num_columns:
max_num_columns = len(row_text)
data[str(idx)] = row_text
# pad rows which don't have max_num_columns elements
# to make sure all rows have the same number of columns
for idx, row_data in data.copy().items():
if len(row_data) != max_num_columns:
row_data = row_data + ["" for _ in range(max_num_columns - len(row_data))]
data[str(idx)] = row_data
# write to csv
with open('output.csv', 'w') as result_file:
wr = csv.writer(result_file, dialect='excel')
for row, row_text in data.items():
wr.writerow(row_text)
# return as Pandas dataframe
df = pd.read_csv('output.csv')
return df, data
The error message i got is..
text = " ".join([x[1] for x in result])
IndexError: list index out of range
The text was updated successfully, but these errors were encountered:
hi @NielsRogge @bsmock , i have use paddleocr instead of easyocr in table transfomer nd i have an issue can u resolve it !!!
The error message i got is..
The text was updated successfully, but these errors were encountered: