Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cannot close object, library is destroyed. This may cause a memory leak! #3

Closed
aceliuchanghong opened this issue Oct 16, 2024 · 1 comment

Comments

@aceliuchanghong
Copy link

def run_surya_table_detection(
    PDF_PATH,
    det_model,
    det_processor,
    layout_model,
    layout_processor,
    table_rec_model,
    table_rec_processor,
    rec_model,
    rec_processor,
):
    """
    table
    """

    new_det_model = det_model, det_processor, layout_model, layout_processor
    new_rec_models = table_rec_model, table_rec_processor, rec_model, rec_processor

    start_time = time.time()

    images, highres_images, names, text_lines = load_pdfs_images(PDF_PATH)
    table_detection = extract_tables(
        images, highres_images, text_lines, new_det_model, new_rec_models
    )

    end_time = time.time()
    elapsed_time = end_time - start_time
    logger.info(f"surya_table_detection耗时: {elapsed_time:.2f}秒")
    return table_detection



if __name__ == "__main__":    
    PDF_PATH = "no_git_oic/页面提取自-NPD2317设计开发记录.pdf"

    det_model_path = os.getenv("SURYA_DET3_MODEL_PATH")
    rec_model_path = os.getenv("SURYA_REC2_MODEL_PATH")
    layout_model_path = os.getenv("SURYA_LAYOUT4_MODEL_PATH")
    order_model_path = os.getenv("SURYA_ORDER_MODEL_PATH")
    table_rec_model_path = os.getenv("SURYA_TABLEREC_MODEL_PATH")

    start_time = time.time()

    rec_processor = load_rec_processor()
    det_model = load_det_model(det_model_path)
    det_processor = load_det_processor(det_model_path)
    rec_model = load_rec_model(rec_model_path)

    layout_model = load_det_model(layout_model_path)
    layout_processor = load_det_processor(layout_model_path)

    order_model = load_order_processor(order_model_path)
    order_processor = load_order_model(order_model_path)

    table_rec_model = load_table_rec_model(table_rec_model_path)
    table_rec_processor = load_table_rec_processor()

    end_time = time.time()
    elapsed_time = end_time - start_time
    logger.info(f"surya模型加载耗时: {elapsed_time:.2f}秒")


    table_detection = run_surya_table_detection(
        PDF_PATH,
        det_model,
        det_processor,
        layout_model,
        layout_processor,
        table_rec_model,
        table_rec_processor,
        rec_model,
        rec_processor,
    )
    logger.info(f"table_detection:\n{table_detection}")
(.venv) (base) root@v100gpu-0002:/mnt/data/llch/my_lm_log# python test/ocr/test_surya.py 
Loaded detection model /mnt/data/llch/surya/surya_det3 on device cuda with dtype torch.float16
Loaded recognition model /mnt/data/llch/surya/surya_rec2 on device cuda with dtype torch.float16
Loaded detection model /mnt/data/llch/surya/surya_layout4 on device cuda with dtype torch.float16
Loaded reading order model /mnt/data/llch/surya/surya_order on device cuda with dtype torch.float16
Loaded recognition model /mnt/data/llch/surya/surya_tablerec on device cuda with dtype torch.float16
INFO:__main__:surya模型加载耗时: 18.78秒
Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.33it/s]
Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.54it/s]
Detecting bboxes: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.48it/s]
Recognizing Text: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s]
Recognizing tables: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.09it/s]
INFO:__main__:surya_table_detection耗时: 2.47秒
INFO:__main__:table_detection:
[ExtractPageResult(cells=[[SpanTableCell(bbox=[24.0, 43.0, 163.0, 69.0], text='项目名称', row_ids=[0], col_ids=[0]), SpanTableCell(bbox=[230.0, 44.0, 1097.0, 73.0], text='(G)CT41G-2220-X7R-50V-22µF-K(N)电容器', row_ids=[0], col_ids=[1]), SpanTableCell(bbox=[23.0, 104.0, 161.0, 130.0], text='预计周期', row_ids=[1], col_ids=[0]), SpanTableCell(bbox=[441.0, 104.0, 730.0, 132.0], text='2023 年 02月 01日', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[229.0, 105.0, 362.0, 131.0], text='起始时间', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[804.0, 105.0, 937.0, 132.0], text='终止时间', row_ids=[1], col_ids=[1]), SpanTableCell(bbox=[1016.0, 105.0, 1300.0, 132.0], text='2023年12月 30日', row_ids=[1], col_ids=[4]), SpanTableCell(bbox=[228.0, 156.0, 441.0, 181.0], text='□换先研究项目', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[424.0, 201.0, 584.0, 228.0], text='四非系列化', row_ids=[3], col_ids=[2]), SpanTableCell(bbox=[228.0, 212.0, 407.0, 236.0], text='△A. 新产品', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[624.0, 212.0, 1361.0, 237.0], text='(□A1 全新产品 □A2 改进型新产品 ☑A3 扩展型新产', row_ids=[3], col_ids=[1]), SpanTableCell(bbox=[622.0, 252.0, 1272.0, 281.0], text='品 □A4 降低成本型新产品 □A5 仿制型新产品)', row_ids=[4], col_ids=[1]), SpanTableCell(bbox=[228.0, 255.0, 363.0, 278.0], text='、新材料', row_ids=[2], col_ids=[1]), SpanTableCell(bbox=[428.0, 256.0, 583.0, 283.0], text='□系列型谱', row_ids=[4], col_ids=[2]), SpanTableCell(bbox=[24.0, 304.0, 160.0, 331.0], text='项目类型', row_ids=[2], col_ids=[0]), SpanTableCell(bbox=[227.0, 319.0, 1064.0, 345.0], text='cB. 工艺改进cC. 基础预研aD. 设备开发及工装夹具开发', row_ids=[5], col_ids=[1]), SpanTableCell(bbox=[621.0, 373.0, 927.0, 396.0], text='□政府项目 归口单位:', row_ids=[6], col_ids=[1]), SpanTableCell(bbox=[227.0, 414.0, 379.0, 440.0], text='國内部项目', row_ids=[6], col_ids=[1]), SpanTableCell(bbox=[426.0, 414.0, 577.0, 440.0], text='ロ外部项目', row_ids=[6], col_ids=[2]), SpanTableCell(bbox=[619.0, 415.0, 928.0, 440.0], text='□纵向项目 归口单位:', row_ids=[7], col_ids=[1]), SpanTableCell(bbox=[621.0, 458.0, 928.0, 484.0], text='□横向项目 合作单位:', row_ids=[8], col_ids=[1]), SpanTableCell(bbox=[20.0, 510.0, 164.0, 537.0], text='成果形式', row_ids=[9], col_ids=[0]), SpanTableCell(bbox=[227.0, 515.0, 1247.0, 544.0], text='A. 样品( )B. 产品(1) C. 专利( )D. 论文( )E. 其它( )', row_ids=[9], col_ids=[1]), SpanTableCell(bbox=[21.0, 568.0, 166.0, 596.0], text='完成形式', row_ids=[10], col_ids=[0]), SpanTableCell(bbox=[226.0, 568.0, 493.0, 596.0], text='产品通过设计定型', row_ids=[10], col_ids=[1])]], rows_cols=[TableResult(cells=[TableCell(bbox=[24.0, 43.0, 163.0, 69.0], text='项目名称'), TableCell(bbox=[230.0, 44.0, 1097.0, 73.0], text='(G)CT41G-2220-X7R-50V-22µF-K(N)片式多层瓷介固定电容器'), TableCell(bbox=[23.0, 104.0, 161.0, 130.0], text='预计周期'), TableCell(bbox=[441.0, 104.0, 730.0, 132.0], text='2023 年 02月 01日'), TableCell(bbox=[229.0, 105.0, 362.0, 131.0], text='起始时间'), TableCell(bbox=[804.0, 105.0, 937.0, 132.0], text='终止时间'), TableCell(bbox=[1016.0, 105.0, 1300.0, 132.0], text='2023年12月 30日'), TableCell(bbox=[228.0, 156.0, 441.0, 181.0], text='□换先研究项目'), TableCell(bbox=[424.0, 201.0, 584.0, 228.0], text='四非系列化'), TableCell(bbox=[228.0, 212.0, 407.0, 236.0], text='△A. 新产品'), TableCell(bbox=[624.0, 212.0, 1361.0, 237.0], text='(□A1 全新产品 □A2 改进型新产品 ☑A3 扩展型新产'), TableCell(bbox=[622.0, 252.0, 1272.0, 281.0], text='品 □A4 降低成本型新产品 □A5 仿制型新产品)'), TableCell(bbox=[228.0, 255.0, 363.0, 278.0], text='、新材料'), TableCell(bbox=[428.0, 256.0, 583.0, 283.0], text='□系列型谱'), TableCell(bbox=[24.0, 304.0, 160.0, 331.0], text='项目类型'), TableCell(bbox=[227.0, 319.0, 1064.0, 345.0], text='cB. 工艺改进cC. 基础预研aD. 设备开发及工装夹具开发'), TableCell(bbox=[621.0, 373.0, 927.0, 396.0], text='□政府项目 归口单位:'), TableCell(bbox=[227.0, 414.0, 379.0, 440.0], text='國内部项目'), TableCell(bbox=[426.0, 414.0, 577.0, 440.0], text='ロ外部项目'), TableCell(bbox=[619.0, 415.0, 928.0, 440.0], text='□纵向项目 归口单位:'), TableCell(bbox=[621.0, 458.0, 928.0, 484.0], text='□横向项目 合作单位:'), TableCell(bbox=[20.0, 510.0, 164.0, 537.0], text='成果形式'), TableCell(bbox=[227.0, 515.0, 1247.0, 544.0], text='A. 样品( )B. 产品(1) C. 专利( )D. 论文( )E. 其它( )'), TableCell(bbox=[21.0, 568.0, 166.0, 596.0], text='完成形式'), TableCell(bbox=[226.0, 568.0, 493.0, 596.0], text='产品通过设计定型')], rows=[TableRow(bbox=[22.91015625, 43.265625, 1101.03515625, 72.515625], row_id=0), TableRow(bbox=[21.5625, 104.203125, 1299.140625, 131.015625], row_id=1), TableRow(bbox=[58.623046875, 157.21875, 458.876953125, 330.28125], row_id=2), TableRow(bbox=[227.75390625, 203.2265625, 1357.08984375, 236.7421875], row_id=3), TableRow(bbox=[222.36328125, 252.28125, 1270.83984375, 281.53125], row_id=4), TableRow(bbox=[235.83984375, 315.9609375, 1052.51953125, 342.1640625], row_id=5), TableRow(bbox=[240.556640625, 369.890625, 918.427734375, 441.796875], row_id=6), TableRow(bbox=[603.75, 414.6796875, 927.1875, 439.6640625], row_id=7), TableRow(bbox=[619.921875, 457.3359375, 927.1875, 483.5390625], row_id=8), TableRow(bbox=[18.8671875, 507.9140625, 1245.234375, 540.2109375], row_id=9), TableRow(bbox=[20.21484375, 567.9375, 494.58984375, 595.96875], row_id=10)], cols=[TableCol(bbox=[20.21484375, 42.9609375, 165.76171875, 594.4453125], col_id=0), TableCol(bbox=[222.36328125, 42.9609375, 1257.36328125, 594.4453125], col_id=1), TableCol(bbox=[431.923828125, 106.640625, 586.904296875, 441.796875], col_id=2), TableCol(bbox=[991.201171875, 103.59375, 1011.416015625, 470.4375], col_id=3), TableCol(bbox=[1014.111328125, 111.515625, 1298.466796875, 138.328125], col_id=4)], image_bbox=[0.0, 0.0, 1380.0, 624.0])], table_imgs=[<PIL.Image.Image image mode=RGB size=1380x624 at 0x7FA541791FD0>])]
-> Cannot close object, library is destroyed. This may cause a memory leak!
-> Cannot close object, library is destroyed. This may cause a memory leak!

-> Cannot close object, library is destroyed. This may cause a memory leak!

how can i sovle it

@VikParuchuri
Copy link
Owner

This is an issue with pypdfium2 - it needs to be the first import in your code. It's just a warning though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants