We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
def run_surya_table_detection( PDF_PATH, det_model, det_processor, layout_model, layout_processor, table_rec_model, table_rec_processor, rec_model, rec_processor, ): """ table """ new_det_model = det_model, det_processor, layout_model, layout_processor new_rec_models = table_rec_model, table_rec_processor, rec_model, rec_processor start_time = time.time() images, highres_images, names, text_lines = load_pdfs_images(PDF_PATH) table_detection = extract_tables( images, highres_images, text_lines, new_det_model, new_rec_models ) end_time = time.time() elapsed_time = end_time - start_time logger.info(f"surya_table_detection耗时: {elapsed_time:.2f}秒") return table_detection if __name__ == "__main__": PDF_PATH = "no_git_oic/页面提取自-NPD2317设计开发记录.pdf" det_model_path = os.getenv("SURYA_DET3_MODEL_PATH") rec_model_path = os.getenv("SURYA_REC2_MODEL_PATH") layout_model_path = os.getenv("SURYA_LAYOUT4_MODEL_PATH") order_model_path = os.getenv("SURYA_ORDER_MODEL_PATH") table_rec_model_path = os.getenv("SURYA_TABLEREC_MODEL_PATH") start_time = time.time() rec_processor = load_rec_processor() det_model = load_det_model(det_model_path) det_processor = load_det_processor(det_model_path) rec_model = load_rec_model(rec_model_path) layout_model = load_det_model(layout_model_path) layout_processor = load_det_processor(layout_model_path) order_model = load_order_processor(order_model_path) order_processor = load_order_model(order_model_path) table_rec_model = load_table_rec_model(table_rec_model_path) table_rec_processor = load_table_rec_processor() end_time = time.time() elapsed_time = end_time - start_time logger.info(f"surya模型加载耗时: {elapsed_time:.2f}秒") table_detection = run_surya_table_detection( PDF_PATH, det_model, det_processor, layout_model, layout_processor, table_rec_model, table_rec_processor, rec_model, rec_processor, ) logger.info(f"table_detection:\n{table_detection}")
(.venv) (base) root@v100gpu-0002:/mnt/data/llch/my_lm_log# python test/ocr/test_surya.py Loaded detection model /mnt/data/llch/surya/surya_det3 on device cuda with dtype torch.float16 Loaded recognition model /mnt/data/llch/surya/surya_rec2 on device cuda with dtype torch.float16 Loaded detection model /mnt/data/llch/surya/surya_layout4 on device cuda with dtype torch.float16 Loaded reading order model /mnt/data/llch/surya/surya_order on device cuda with dtype torch.float16 Loaded recognition model /mnt/data/llch/surya/surya_tablerec on device cuda with dtype torch.float16 INFO:__main__:surya模型加载耗时: 18.78秒 Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2.33it/s] Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.54it/s] Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 5.48it/s] Recognizing Text: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.26it/s] Recognizing tables: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 6.09it/s] INFO:__main__:surya_table_detection耗时: 2.47秒 INFO:__main__:table_detection: [ExtractPageResult(cells=[[SpanTableCell(bbox=[24.0, 43.0, 163.0, 69.0], text='项目名称', row_ids=[0], col_ids=[0]), SpanTableCell(bbox=[230.0, 44.0, 1097.0, 73.0], text='(G)CT41G-2220-X7R-50V-22µF-K(N)电容器', row_ids=[0], col_ids=[1]), SpanTableCell(bbox=[23.0, 104.0, 161.0, 130.0], text='预计周期', row_ids=[1], col_ids=[0]), SpanTableCell(bbox=[441.0, 104.0, 730.0, 132.0], text='2023 年 02月 01日', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[229.0, 105.0, 362.0, 131.0], text='起始时间', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[804.0, 105.0, 937.0, 132.0], text='终止时间', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[1016.0, 105.0, 1300.0, 132.0], text='2023年12月 30日', row_ids=[1], col_ids=[4]), SpanTableCell(bbox=[228.0, 156.0, 441.0, 181.0], text='□换先研究项目', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[424.0, 201.0, 584.0, 228.0], text='四非系列化', row_ids=[3], col_ids=[2]), SpanTableCell(bbox=[228.0, 212.0, 407.0, 236.0], text='△A. 新产品', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[624.0, 212.0, 1361.0, 237.0], text='(□A1 全新产品 □A2 改进型新产品 ☑A3 扩展型新产', row_ids=[3], col_ids=[1]), SpanTableCell(bbox=[622.0, 252.0, 1272.0, 281.0], text='品 □A4 降低成本型新产品 □A5 仿制型新产品)', row_ids=[4], col_ids=[1]), SpanTableCell(bbox=[228.0, 255.0, 363.0, 278.0], text='、新材料', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[428.0, 256.0, 583.0, 283.0], text='□系列型谱', row_ids=[4], col_ids=[2]), SpanTableCell(bbox=[24.0, 304.0, 160.0, 331.0], text='项目类型', row_ids=[2], col_ids=[0]), SpanTableCell(bbox=[227.0, 319.0, 1064.0, 345.0], text='cB. 工艺改进cC. 基础预研aD. 设备开发及工装夹具开发', row_ids=[5], col_ids=[1]), SpanTableCell(bbox=[621.0, 373.0, 927.0, 396.0], text='□政府项目 归口单位:', row_ids=[6], col_ids=[1]), SpanTableCell(bbox=[227.0, 414.0, 379.0, 440.0], text='國内部项目', row_ids=[6], col_ids=[1]), SpanTableCell(bbox=[426.0, 414.0, 577.0, 440.0], text='ロ外部项目', row_ids=[6], col_ids=[2]), SpanTableCell(bbox=[619.0, 415.0, 928.0, 440.0], text='□纵向项目 归口单位:', row_ids=[7], col_ids=[1]), SpanTableCell(bbox=[621.0, 458.0, 928.0, 484.0], text='□横向项目 合作单位:', row_ids=[8], col_ids=[1]), SpanTableCell(bbox=[20.0, 510.0, 164.0, 537.0], text='成果形式', row_ids=[9], col_ids=[0]), SpanTableCell(bbox=[227.0, 515.0, 1247.0, 544.0], text='A. 样品( )B. 产品(1) C. 专利( )D. 论文( )E. 其它( )', row_ids=[9], col_ids=[1]), SpanTableCell(bbox=[21.0, 568.0, 166.0, 596.0], text='完成形式', row_ids=[10], col_ids=[0]), SpanTableCell(bbox=[226.0, 568.0, 493.0, 596.0], text='产品通过设计定型', row_ids=[10], col_ids=[1])]], rows_cols=[TableResult(cells=[TableCell(bbox=[24.0, 43.0, 163.0, 69.0], text='项目名称'), TableCell(bbox=[230.0, 44.0, 1097.0, 73.0], text='(G)CT41G-2220-X7R-50V-22µF-K(N)片式多层瓷介固定电容器'), TableCell(bbox=[23.0, 104.0, 161.0, 130.0], text='预计周期'), TableCell(bbox=[441.0, 104.0, 730.0, 132.0], text='2023 年 02月 01日'), TableCell(bbox=[229.0, 105.0, 362.0, 131.0], text='起始时间'), TableCell(bbox=[804.0, 105.0, 937.0, 132.0], text='终止时间'), TableCell(bbox=[1016.0, 105.0, 1300.0, 132.0], text='2023年12月 30日'), TableCell(bbox=[228.0, 156.0, 441.0, 181.0], text='□换先研究项目'), TableCell(bbox=[424.0, 201.0, 584.0, 228.0], text='四非系列化'), TableCell(bbox=[228.0, 212.0, 407.0, 236.0], text='△A. 新产品'), TableCell(bbox=[624.0, 212.0, 1361.0, 237.0], text='(□A1 全新产品 □A2 改进型新产品 ☑A3 扩展型新产'), TableCell(bbox=[622.0, 252.0, 1272.0, 281.0], text='品 □A4 降低成本型新产品 □A5 仿制型新产品)'), TableCell(bbox=[228.0, 255.0, 363.0, 278.0], text='、新材料'), TableCell(bbox=[428.0, 256.0, 583.0, 283.0], text='□系列型谱'), TableCell(bbox=[24.0, 304.0, 160.0, 331.0], text='项目类型'), TableCell(bbox=[227.0, 319.0, 1064.0, 345.0], text='cB. 工艺改进cC. 基础预研aD. 设备开发及工装夹具开发'), TableCell(bbox=[621.0, 373.0, 927.0, 396.0], text='□政府项目 归口单位:'), TableCell(bbox=[227.0, 414.0, 379.0, 440.0], text='國内部项目'), TableCell(bbox=[426.0, 414.0, 577.0, 440.0], text='ロ外部项目'), TableCell(bbox=[619.0, 415.0, 928.0, 440.0], text='□纵向项目 归口单位:'), TableCell(bbox=[621.0, 458.0, 928.0, 484.0], text='□横向项目 合作单位:'), TableCell(bbox=[20.0, 510.0, 164.0, 537.0], text='成果形式'), TableCell(bbox=[227.0, 515.0, 1247.0, 544.0], text='A. 样品( )B. 产品(1) C. 专利( )D. 论文( )E. 其它( )'), TableCell(bbox=[21.0, 568.0, 166.0, 596.0], text='完成形式'), TableCell(bbox=[226.0, 568.0, 493.0, 596.0], text='产品通过设计定型')], rows=[TableRow(bbox=[22.91015625, 43.265625, 1101.03515625, 72.515625], row_id=0), TableRow(bbox=[21.5625, 104.203125, 1299.140625, 131.015625], row_id=1), TableRow(bbox=[58.623046875, 157.21875, 458.876953125, 330.28125], row_id=2), TableRow(bbox=[227.75390625, 203.2265625, 1357.08984375, 236.7421875], row_id=3), TableRow(bbox=[222.36328125, 252.28125, 1270.83984375, 281.53125], row_id=4), TableRow(bbox=[235.83984375, 315.9609375, 1052.51953125, 342.1640625], row_id=5), TableRow(bbox=[240.556640625, 369.890625, 918.427734375, 441.796875], row_id=6), TableRow(bbox=[603.75, 414.6796875, 927.1875, 439.6640625], row_id=7), TableRow(bbox=[619.921875, 457.3359375, 927.1875, 483.5390625], row_id=8), TableRow(bbox=[18.8671875, 507.9140625, 1245.234375, 540.2109375], row_id=9), TableRow(bbox=[20.21484375, 567.9375, 494.58984375, 595.96875], row_id=10)], cols=[TableCol(bbox=[20.21484375, 42.9609375, 165.76171875, 594.4453125], col_id=0), TableCol(bbox=[222.36328125, 42.9609375, 1257.36328125, 594.4453125], col_id=1), TableCol(bbox=[431.923828125, 106.640625, 586.904296875, 441.796875], col_id=2), TableCol(bbox=[991.201171875, 103.59375, 1011.416015625, 470.4375], col_id=3), TableCol(bbox=[1014.111328125, 111.515625, 1298.466796875, 138.328125], col_id=4)], image_bbox=[0.0, 0.0, 1380.0, 624.0])], table_imgs=[<PIL.Image.Image image mode=RGB size=1380x624 at 0x7FA541791FD0>])] -> Cannot close object, library is destroyed. This may cause a memory leak! -> Cannot close object, library is destroyed. This may cause a memory leak!
-> Cannot close object, library is destroyed. This may cause a memory leak!
how can i sovle it
The text was updated successfully, but these errors were encountered:
This is an issue with pypdfium2 - it needs to be the first import in your code. It's just a warning though.
Sorry, something went wrong.
No branches or pull requests
-> Cannot close object, library is destroyed. This may cause a memory leak!
how can i sovle it
The text was updated successfully, but these errors were encountered: