huggingface · vasqu · Mar 19, 2026 · Feb 5, 2026 · Feb 9, 2026 · Feb 24, 2026
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -1272,6 +1272,8 @@
         title: PP-OCRv5_mobile_det
       - local: model_doc/pp_ocrv5_server_det
         title: PP-OCRv5_server_det
+      - local: model_doc/pp_chart2table
+        title: PPChart2Table
       - local: model_doc/pp_lcnet
         title: PPLCNet
       - local: model_doc/pp_lcnet_v3

diff --git a/docs/source/en/model_doc/pp_chart2table.md b/docs/source/en/model_doc/pp_chart2table.md
@@ -0,0 +1,174 @@
+<!--Copyright 2026 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2026-03-16.*
+
+# PP-Chart2Table
+
+<div class="flex flex-wrap space-x-1">
+<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+</div>
+
+## Overview
+
+**PP-Chart2Table** is a SOTA multimodal model developed by the PaddlePaddle team, specializing in chart parsing for both Chinese and English. Its high performance is driven by a novel "Shuffled Chart Data Retrieval" training task, which, combined with a refined token masking strategy, significantly improves its efficiency in converting charts to data tables. The model is further strengthened by an advanced data synthesis pipeline that uses high-quality seed data, RAG, and LLMs persona design to create a richer, more diverse training set. To address the challenge of large-scale unlabeled, out-of-distribution (OOD) data, the team implemented a two-stage distillation process, ensuring robust adaptability and generalization on real-world data.
+
+## Model Architecture 
+PP-Chart2Table adopts a multimodal fusion architecture that combines a vision tower for chart feature extraction and a language model for table structure generation, enabling end-to-end chart-to-table conversion.
+
+
+## Usage
+
+### Single input inference
+
+The example below demonstrates how to classify image with PP-Chart2Table using [`Pipeline`] or the [`AutoModel`].
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import requests
+from PIL import Image
+from transformers import pipeline
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+pipe = pipeline(
+    task="image-text-to-text", 
+    model=model_path,
+    device_map="auto",
+)
+image = Image.open(requests.get("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png", stream=True).raw)
+result = pipe(
+    images=image, 
+    text="", 
+    do_sample=False, 
+    max_new_tokens=256
+)
+print(result)
+
+```
+
+</hfoption>
+
+<hfoption id="AutoModel">
+
+```py
+import requests
+from PIL import Image
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+model = AutoModelForImageTextToText.from_pretrained(
+    model_path, 
+    dtype="float32",
-    dtype="float32",
-    dtype="float32",
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_path, use_fast=True).to(model.device)
-processor = AutoProcessor.from_pretrained(model_path, use_fast=True).to(model.device)
+processor = AutoProcessor.from_pretrained(model_path).to(model.device)
-processor = AutoProcessor.from_pretrained(model_path, use_fast=True).to(model.device)
+processor = AutoProcessor.from_pretrained(model_path).to(model.device)
+
+image = Image.open(requests.get("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png", stream=True).raw)
+inputs = processor(images=image)
+
+generated_ids = model.generate(**inputs, use_cache=True, do_sample=False, max_new_tokens=256)
-generated_ids = model.generate(**inputs, use_cache=True, do_sample=False, max_new_tokens=256)
+generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=256)
-generated_ids = model.generate(**inputs, use_cache=True, do_sample=False, max_new_tokens=256)
+generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=256)
+generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+result = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+print(result)
+
+```
+
+</hfoption>
+</hfoptions>
+
+### Batched inference
+
+Here is how you can do it with PP-Chart2Table using [`Pipeline`] or the [`AutoModel`]:
+
+<hfoptions id="usage">
+<hfoption id="Pipeline">
+
+```py
+import requests
+from transformers import pipeline
+from PIL import Image
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+pipe = pipeline(
+    task="image-text-to-text", 
+    model=model_path,
+    device_map="auto",
+)
+image = Image.open(requests.get("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png", stream=True).raw)
+result = pipe(
+    images=[image, image],
+    text="",
+    do_sample=False,
+    max_new_tokens=256
+)
+print(result)
+```
+
+</hfoption>
+
+<hfoption id="AutoModel">
+
+```py
+import requests
+from PIL import Image
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+model = AutoModelForImageTextToText.from_pretrained(
+    model_path, 
+    dtype="float32",
-    dtype="float32",
-    dtype="float32",
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_path).to(model.device)
+
+image = Image.open(requests.get("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png", stream=True).raw)
+inputs = processor(images=[image, image])
+
+generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=256)
+generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+result = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+print(result)
+```
+
+</hfoption>
+</hfoptions>
+
+## PPChart2TableForConditionalGeneration
+
+[[autodoc]] PPChart2TableForConditionalGeneration
+
+## PPChart2TableModel
+
+[[autodoc]] PPChart2TableModel
+
+## PPChart2TableConfig
+
+[[autodoc]] PPChart2TableConfig
+
+## PPChart2TableVisionPreTrainedModel
+
+[[autodoc]] PPChart2TableVisionPreTrainedModel
+
+## PPChart2TablePreTrainedModel
+
+[[autodoc]] PPChart2TablePreTrainedModel
-## PPChart2TableVisionPreTrainedModel
-
-[[autodoc]] PPChart2TableVisionPreTrainedModel
-
-## PPChart2TablePreTrainedModel
-
-[[autodoc]] PPChart2TablePreTrainedModel
-## PPChart2TableVisionPreTrainedModel
-
-[[autodoc]] PPChart2TableVisionPreTrainedModel
-
-## PPChart2TablePreTrainedModel
-
-[[autodoc]] PPChart2TablePreTrainedModel
+
+## PPChart2TableImageProcessorFast
+
+[[autodoc]] PPChart2TableImageProcessorFast
+
+## PPChart2TableProcessor
+
+[[autodoc]] PPChart2TableProcessor
diff --git a/src/transformers/conversion_mapping.py b/src/transformers/conversion_mapping.py
@@ -450,6 +450,7 @@ def register_checkpoint_conversion_mapping(
     "sam3_tracker",
     "sam3_tracker_video",
     "paddleocrvl",
+    "ppchart2table",
 "got_ocr2": "llava", 
 "got_ocr2": "llava", 
     # NOTE: Slightly different from `model_type` (to follow naming conventions in vllm/sglang)
     "ernie4_5_vlmoe",
     "ernie4_5_vl_moe",  # BC alias

diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
@@ -317,6 +317,7 @@
     from .plbart import *
     from .poolformer import *
     from .pop2piano import *
+    from .pp_chart2table import *
     from .pp_doclayout_v2 import *
     from .pp_doclayout_v3 import *
     from .pp_lcnet import *

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -355,6 +355,7 @@
         ("plbart", "PLBartConfig"),
         ("poolformer", "PoolFormerConfig"),
         ("pop2piano", "Pop2PianoConfig"),
+        ("pp_chart2table", "PPChart2TableConfig"),
         ("pp_doclayout_v2", "PPDocLayoutV2Config"),
         ("pp_doclayout_v3", "PPDocLayoutV3Config"),
         ("pp_lcnet", "PPLCNetConfig"),
@@ -869,6 +870,7 @@
         ("plbart", "PLBart"),
         ("poolformer", "PoolFormer"),
         ("pop2piano", "Pop2Piano"),
+        ("pp_chart2table", "PPChart2Table"),
         ("pp_doclayout_v2", "PPDocLayoutV2"),
         ("pp_doclayout_v3", "PPDocLayoutV3"),
         ("pp_lcnet", "PPLCNet"),

diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py
@@ -170,6 +170,7 @@
             ("pixio", ("BitImageProcessor", "BitImageProcessorFast")),
             ("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
             ("poolformer", ("PoolFormerImageProcessor", "PoolFormerImageProcessorFast")),
+            ("pp_chart2table", (None, "PPChart2TableImageProcessorFast")),
             ("pp_doclayout_v2", (None, "PPDocLayoutV2ImageProcessorFast")),
             ("pp_doclayout_v3", (None, "PPDocLayoutV3ImageProcessorFast")),
             ("pp_lcnet", (None, "PPLCNetImageProcessorFast")),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -985,6 +985,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
         ("perception_lm", "PerceptionLMForConditionalGeneration"),
         ("pix2struct", "Pix2StructForConditionalGeneration"),
         ("pixtral", "LlavaForConditionalGeneration"),
+        ("pp_chart2table", "PPChart2TableForConditionalGeneration"),
-        ("pp_chart2table", "PPChart2TableForConditionalGeneration"),
+        ("pp_chart2table", "GotOcr2ForConditionalGeneration"),
-        ("pp_chart2table", "PPChart2TableForConditionalGeneration"),
+        ("pp_chart2table", "GotOcr2ForConditionalGeneration"),
         ("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"),
         ("qwen2_vl", "Qwen2VLForConditionalGeneration"),
         ("qwen3_5", "Qwen3_5ForConditionalGeneration"),

diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py
@@ -136,6 +136,7 @@
             ("pix2struct", "Pix2StructProcessor"),
             ("pixtral", "PixtralProcessor"),
             ("pop2piano", "Pop2PianoProcessor"),
+            ("pp_chart2table", "PPChart2TableProcessor"),
             ("qwen2_5_omni", "Qwen2_5OmniProcessor"),
             ("qwen2_5_vl", "Qwen2_5_VLProcessor"),
             ("qwen2_audio", "Qwen2AudioProcessor"),

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -255,6 +255,7 @@
             else ("TokenizersBackend" if is_tokenizers_available() else None),
         ),
         ("plbart", "PLBartTokenizer" if is_tokenizers_available() else None),
+        ("pp_chart2table", "TokenizersBackend" if is_tokenizers_available() else None),
         ("prophetnet", "ProphetNetTokenizer"),
         ("qdqbert", "BertTokenizer" if is_tokenizers_available() else None),
         ("qwen2", "Qwen2Tokenizer" if is_tokenizers_available() else None),

diff --git a/src/transformers/models/pp_chart2table/__init__.py b/src/transformers/models/pp_chart2table/__init__.py
@@ -0,0 +1,30 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure
+
+
+if TYPE_CHECKING:
+    from .configuration_pp_chart2table import *
+    from .image_processing_pp_chart2table_fast import *
+    from .modeling_pp_chart2table import *
+    from .processing_pp_chart2table import *
+else:
+    import sys
+
+    _file = globals()["__file__"]
+    sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)