Skip to content

Commit

Permalink
Support tableformer model choice
Browse files Browse the repository at this point in the history
Signed-off-by: Christoph Auer <[email protected]>
  • Loading branch information
cau-git committed Sep 19, 2024
1 parent 6dd1e91 commit 5b131d2
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
6 changes: 6 additions & 0 deletions docling/datamodel/base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ class DocInputType(str, Enum):
STREAM = auto()


class TableFormerMode(str, Enum):
FAST = auto()
ACCURATE = auto()


class CoordOrigin(str, Enum):
TOPLEFT = auto()
BOTTOMLEFT = auto()
Expand Down Expand Up @@ -305,6 +310,7 @@ class TableStructureOptions(BaseModel):
# are merged across table columns.
# False: Let table structure model define the text cells, ignore PDF cells.
)
mode: TableFormerMode = TableFormerMode.FAST


class PipelineOptions(BaseModel):
Expand Down
10 changes: 9 additions & 1 deletion docling/models/table_structure_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import copy
import os.path
from pathlib import Path
from typing import Iterable, List

import numpy
Expand All @@ -10,6 +12,7 @@
Page,
TableCell,
TableElement,
TableFormerMode,
TableStructurePrediction,
)

Expand All @@ -18,10 +21,15 @@ class TableStructureModel:
def __init__(self, config):
self.config = config
self.do_cell_matching = config["do_cell_matching"]
self.mode = config["mode"]

self.enabled = config["enabled"]
if self.enabled:
artifacts_path = config["artifacts_path"]
artifacts_path: Path = config["artifacts_path"]

if self.mode == TableFormerMode.ACCURATE:
artifacts_path = artifacts_path / "fat"

# Third Party
import docling_ibm_models.tableformer.common as c

Expand Down
1 change: 1 addition & 0 deletions docling/pipeline/standard_model_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
"artifacts_path": artifacts_path
/ StandardModelPipeline._table_model_path,
"enabled": pipeline_options.do_table_structure,
"mode": pipeline_options.table_structure_options.mode,
"do_cell_matching": pipeline_options.table_structure_options.do_cell_matching,
}
),
Expand Down

0 comments on commit 5b131d2

Please sign in to comment.