diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
index 9d736cdbc537..2aebe0d7e74f 100644
--- a/docs/source/en/_toctree.yml
+++ b/docs/source/en/_toctree.yml
@@ -1284,6 +1284,8 @@
title: PP-OCRv5_server_det
- local: model_doc/pp_ocrv5_server_rec
title: PP-OCRv5_server_rec
+ - local: model_doc/pp_chart2table
+ title: PPChart2Table
- local: model_doc/pp_lcnet
title: PPLCNet
- local: model_doc/pp_lcnet_v3
diff --git a/docs/source/en/model_doc/pp_chart2table.md b/docs/source/en/model_doc/pp_chart2table.md
new file mode 100644
index 000000000000..b8b603035c33
--- /dev/null
+++ b/docs/source/en/model_doc/pp_chart2table.md
@@ -0,0 +1,204 @@
+
+*This model was released on 2025-05-20 and added to Hugging Face Transformers on 2026-03-18.*
+
+# PP-Chart2Table
+
+
+

+
+
+## Overview
+
+**PP-Chart2Table** is a SOTA multimodal model developed by the PaddlePaddle team, specializing in chart parsing for both Chinese and English. Its high performance is driven by a novel "Shuffled Chart Data Retrieval" training task, which, combined with a refined token masking strategy, significantly improves its efficiency in converting charts to data tables. The model is further strengthened by an advanced data synthesis pipeline that uses high-quality seed data, RAG, and LLMs persona design to create a richer, more diverse training set. To address the challenge of large-scale unlabeled, out-of-distribution (OOD) data, the team implemented a two-stage distillation process, ensuring robust adaptability and generalization on real-world data.
+
+## Model Architecture
+PP-Chart2Table adopts a multimodal fusion architecture that combines a vision tower for chart feature extraction and a language model for table structure generation, enabling end-to-end chart-to-table conversion.
+
+
+## Usage
+
+### Single input inference
+
+The example below demonstrates how to classify image with PP-Chart2Table using [`Pipeline`] or the [`AutoModel`].
+
+
+
+
+```py
+from transformers import pipeline
+
+pipe = pipeline("image-text-to-text", model="PaddlePaddle/PP-Chart2Table_safetensors")
+
+# PPChart2TableProcessor uses hardcoded "Chart to table" instruction internally via chat template
+conversation = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png",
+ },
+ ],
+ },
+]
+result = pipe(text=conversation)
+print(result[0]["generated_text"])
+
+```
+
+
+
+
+
+```py
+import requests
+from PIL import Image
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+model = AutoModelForImageTextToText.from_pretrained(
+ model_path,
+ device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_path)
+
+# PPChart2TableProcessor uses hardcoded "Chart to table" instruction internally via chat template
+conversation = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png",
+ },
+ ],
+ },
+]
+
+inputs = processor.apply_chat_template(
+ conversation,
+ tokenize=True,
+ add_generation_prompt=True,
+ truncation=True,
+ return_dict=True,
+ return_tensors="pt",
+).to(model.device)
+
+generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=256)
+generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+result = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+print(result)
+
+```
+
+
+
+
+### Batched inference
+
+Here is how you can do it with PP-Chart2Table using [`Pipeline`] or the [`AutoModel`]:
+
+
+
+
+```py
+from transformers import pipeline
+
+pipe = pipeline("image-text-to-text", model="PaddlePaddle/PP-Chart2Table_safetensors")
+
+# PPChart2TableProcessor uses hardcoded "Chart to table" instruction internally via chat template
+conversation = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png",
+ },
+ ],
+ },
+]
+result = pipe(text=[conversation, conversation])
+print(result[0][0]["generated_text"])
+
+```
+
+
+
+
+
+```py
+import requests
+from PIL import Image
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+model = AutoModelForImageTextToText.from_pretrained(
+ model_path,
+ device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(model_path)
+
+# PPChart2TableProcessor uses hardcoded "Chart to table" instruction internally via chat template
+conversation = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png",
+ },
+ ],
+ },
+]
+
+batch_conversation = [conversation, conversation]
+inputs = processor.apply_chat_template(
+ batch_conversation,
+ tokenize=True,
+ add_generation_prompt=True,
+ truncation=True,
+ return_dict=True,
+ return_tensors="pt",
+).to(model.device)
+
+generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=256)
+generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+result = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+print(result)
+
+```
+
+
+
+
+
+## PPChart2TableConfig
+
+[[autodoc]] PPChart2TableConfig
+
+## PPChart2TableImageProcessor
+
+[[autodoc]] PPChart2TableImageProcessor
+
+## PPChart2TableImageProcessorPil
+
+[[autodoc]] PPChart2TableImageProcessorPil
+
+## PPChart2TableProcessor
+
+[[autodoc]] PPChart2TableProcessor
diff --git a/src/transformers/conversion_mapping.py b/src/transformers/conversion_mapping.py
index 820443e51832..ff1fd026dd6b 100755
--- a/src/transformers/conversion_mapping.py
+++ b/src/transformers/conversion_mapping.py
@@ -80,6 +80,7 @@
"mllama": "llava",
"qwen2_5_vl": "qwen2_vl",
"sam3_tracker_video": "sam3_tracker",
+ "pp_chart2table": "got_ocr2",
}
diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
index 860a1bac23cf..2995e6125111 100644
--- a/src/transformers/models/__init__.py
+++ b/src/transformers/models/__init__.py
@@ -321,6 +321,7 @@
from .plbart import *
from .poolformer import *
from .pop2piano import *
+ from .pp_chart2table import *
from .pp_doclayout_v2 import *
from .pp_doclayout_v3 import *
from .pp_lcnet import *
diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
index e69c03978ebf..6e6e9d8c9333 100644
--- a/src/transformers/models/auto/configuration_auto.py
+++ b/src/transformers/models/auto/configuration_auto.py
@@ -359,6 +359,7 @@
("plbart", "PLBartConfig"),
("poolformer", "PoolFormerConfig"),
("pop2piano", "Pop2PianoConfig"),
+ ("pp_chart2table", "PPChart2TableConfig"),
("pp_doclayout_v2", "PPDocLayoutV2Config"),
("pp_doclayout_v3", "PPDocLayoutV3Config"),
("pp_lcnet", "PPLCNetConfig"),
@@ -879,6 +880,7 @@
("plbart", "PLBart"),
("poolformer", "PoolFormer"),
("pop2piano", "Pop2Piano"),
+ ("pp_chart2table", "PPChart2Table"),
("pp_doclayout_v2", "PPDocLayoutV2"),
("pp_doclayout_v3", "PPDocLayoutV3"),
("pp_lcnet", "PPLCNet"),
diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py
index 0060eff0007c..520c1fb423b3 100644
--- a/src/transformers/models/auto/image_processing_auto.py
+++ b/src/transformers/models/auto/image_processing_auto.py
@@ -194,6 +194,10 @@
("pixio", {"torchvision": "BitImageProcessor", "pil": "BitImageProcessorPil"}),
("pixtral", {"torchvision": "PixtralImageProcessor", "pil": "PixtralImageProcessorPil"}),
("poolformer", {"torchvision": "PoolFormerImageProcessor", "pil": "PoolFormerImageProcessorPil"}),
+ (
+ "pp_chart2table",
+ {"torchvision": "PPChart2TableImageProcessor", "pil": "PPChart2TableImageProcessorPil"},
+ ),
("pp_doclayout_v2", {"torchvision": "PPDocLayoutV2ImageProcessor"}),
("pp_doclayout_v3", {"torchvision": "PPDocLayoutV3ImageProcessor"}),
("pp_lcnet", {"torchvision": "PPLCNetImageProcessor"}),
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 2d74698f235e..cc93964950e7 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -995,6 +995,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
("pi0", "PI0ForConditionalGeneration"),
("pix2struct", "Pix2StructForConditionalGeneration"),
("pixtral", "LlavaForConditionalGeneration"),
+ ("pp_chart2table", "GotOcr2ForConditionalGeneration"),
("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"),
("qwen2_vl", "Qwen2VLForConditionalGeneration"),
("qwen3_5", "Qwen3_5ForConditionalGeneration"),
diff --git a/src/transformers/models/auto/processing_auto.py b/src/transformers/models/auto/processing_auto.py
index c75911a2f557..a38c8f1a571f 100644
--- a/src/transformers/models/auto/processing_auto.py
+++ b/src/transformers/models/auto/processing_auto.py
@@ -137,6 +137,7 @@
("pix2struct", "Pix2StructProcessor"),
("pixtral", "PixtralProcessor"),
("pop2piano", "Pop2PianoProcessor"),
+ ("pp_chart2table", "PPChart2TableProcessor"),
("qwen2_5_omni", "Qwen2_5OmniProcessor"),
("qwen2_5_vl", "Qwen2_5_VLProcessor"),
("qwen2_audio", "Qwen2AudioProcessor"),
diff --git a/src/transformers/models/pp_chart2table/__init__.py b/src/transformers/models/pp_chart2table/__init__.py
new file mode 100644
index 000000000000..961039282748
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/__init__.py
@@ -0,0 +1,30 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import _LazyModule
+from ...utils.import_utils import define_import_structure
+
+
+if TYPE_CHECKING:
+ from .configuration_pp_chart2table import *
+ from .image_processing_pil_pp_chart2table import *
+ from .image_processing_pp_chart2table import *
+ from .processing_pp_chart2table import *
+else:
+ import sys
+
+ _file = globals()["__file__"]
+ sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
diff --git a/src/transformers/models/pp_chart2table/configuration_pp_chart2table.py b/src/transformers/models/pp_chart2table/configuration_pp_chart2table.py
new file mode 100644
index 000000000000..3e85e2c96667
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/configuration_pp_chart2table.py
@@ -0,0 +1,131 @@
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# This file was automatically generated from src/transformers/models/pp_chart2table/modular_pp_chart2table.py.
+# Do NOT edit this file manually as any edits will be overwritten by the generation of
+# the file from the modular. If any change should be done, please apply the change to the
+# modular_pp_chart2table.py file directly. One of our CI enforces this.
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# Copyright 2026 The PaddlePaddle Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from huggingface_hub.dataclasses import strict
+
+from ...configuration_utils import PreTrainedConfig
+from ...utils import auto_docstring
+from ..auto import CONFIG_MAPPING, AutoConfig
+
+
+@auto_docstring(checkpoint="facebook/sam-vit-huge")
+@strict(accept_kwargs=True)
+class PPChart2TableVisionConfig(PreTrainedConfig):
+ r"""
+ output_channels (`int`, *optional*, defaults to 256):
+ Dimensionality of the output channels in the Patch Encoder.
+ window_size (`int`, *optional*, defaults to 14):
+ Window size for relative position.
+ use_abs_pos (`bool`, *optional*, defaults to `True`):
+ Whether to use absolute position embedding.
+ use_rel_pos (`bool`, *optional*, defaults to `True`):
+ Whether to use relative position embedding.
+ global_attn_indexes (`list[int]`, *optional*, defaults to `[2, 5, 8, 11]`):
+ The indexes of the global attention layers.
+ mlp_dim (`int`, *optional*, defaults to 3072):
+ The dimensionality of the MLP layer in the Transformer encoder.
+ """
+
+ base_config_key = "vision_config"
+ hidden_size: int = 768
+ output_channels: int = 256
+ num_hidden_layers: int = 12
+ num_attention_heads: int = 12
+ num_channels: int = 3
+ image_size: int | list[int] | tuple[int, int] = 1024
+ patch_size: int | list[int] | tuple[int, int] = 16
+ hidden_act: str = "gelu"
+ layer_norm_eps: float = 1e-06
+ attention_dropout: float | int = 0.0
+ initializer_range: float = 1e-10
+ qkv_bias: bool = True
+ use_abs_pos: bool = True
+ use_rel_pos: bool = True
+ window_size: int = 14
+ global_attn_indexes: list[int] | tuple[int, ...] = (2, 5, 8, 11)
+ mlp_dim: int = 3072
+
+
+@auto_docstring(checkpoint="PaddlePaddle/PP-Chart2Table_safetensors")
+@strict(accept_kwargs=True)
+class PPChart2TableConfig(PreTrainedConfig):
+ r"""
+ Example:
+
+ ```python
+ >>> from transformers import GotOcr2ForConditionalGeneration, PPChart2TableConfig
+
+ >>> # Initializing a PPChart2Table style configuration
+ >>> configuration = PPChart2TableConfig()
+
+ >>> # Initializing a model from the PaddlePaddle/PP-Chart2Table_safetensors style configuration
+ >>> model = GotOcr2ForConditionalGeneration(configuration) # underlying architecture is Got Ocr 2
+
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```"""
+
+ model_type = "pp_chart2table"
+ attribute_map = {
+ "image_token_id": "image_token_index",
+ }
+ sub_configs = {"text_config": AutoConfig, "vision_config": PPChart2TableVisionConfig}
+
+ vision_config: dict | PreTrainedConfig | None = None
+ text_config: dict | PreTrainedConfig | None = None
+ image_token_index: int = 151859
+ image_seq_length: int = 576
+ tie_word_embeddings: bool = True
+
+ def __post_init__(self, **kwargs):
+ if self.vision_config is None:
+ self.vision_config = PPChart2TableVisionConfig()
+ elif isinstance(self.vision_config, dict):
+ self.vision_config = PPChart2TableVisionConfig(**self.vision_config)
+
+ if isinstance(self.text_config, dict):
+ self.text_config["model_type"] = self.text_config.get("model_type", "qwen2")
+ self.text_config = CONFIG_MAPPING[self.text_config["model_type"]](**self.text_config)
+ elif self.text_config is None:
+ self.text_config = CONFIG_MAPPING["qwen2"](
+ vocab_size=151860,
+ hidden_size=1024,
+ intermediate_size=2816,
+ num_hidden_layers=24,
+ num_attention_heads=16,
+ num_key_value_heads=16,
+ hidden_act="silu",
+ max_position_embeddings=32768,
+ initializer_range=0.02,
+ rms_norm_eps=1e-6,
+ use_cache=True,
+ tie_word_embeddings=self.tie_word_embeddings,
+ rope_theta=1000000.0,
+ rope_parameters=None,
+ use_sliding_window=False,
+ sliding_window=4096,
+ max_window_layers=21,
+ attention_dropout=0.0,
+ )
+
+ super().__post_init__(**kwargs)
+
+
+__all__ = ["PPChart2TableConfig"]
diff --git a/src/transformers/models/pp_chart2table/image_processing_pil_pp_chart2table.py b/src/transformers/models/pp_chart2table/image_processing_pil_pp_chart2table.py
new file mode 100644
index 000000000000..40cce468b5dc
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/image_processing_pil_pp_chart2table.py
@@ -0,0 +1,36 @@
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# This file was automatically generated from src/transformers/models/pp_chart2table/modular_pp_chart2table.py.
+# Do NOT edit this file manually as any edits will be overwritten by the generation of
+# the file from the modular. If any change should be done, please apply the change to the
+# modular_pp_chart2table.py file directly. One of our CI enforces this.
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# Copyright 2026 The PaddlePaddle Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ...image_processing_backends import PilBackend
+from ...utils import auto_docstring
+
+
+@auto_docstring
+class PPChart2TableImageProcessorPil(PilBackend):
+ resample = 3
+ image_mean = [0.48145466, 0.4578275, 0.40821073]
+ image_std = [0.26862954, 0.26130258, 0.27577711]
+ size = {"height": 1024, "width": 1024}
+ do_resize = True
+ do_rescale = True
+ do_normalize = True
+
+
+__all__ = ["PPChart2TableImageProcessorPil"]
diff --git a/src/transformers/models/pp_chart2table/image_processing_pp_chart2table.py b/src/transformers/models/pp_chart2table/image_processing_pp_chart2table.py
new file mode 100644
index 000000000000..b38027aecef9
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/image_processing_pp_chart2table.py
@@ -0,0 +1,36 @@
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# This file was automatically generated from src/transformers/models/pp_chart2table/modular_pp_chart2table.py.
+# Do NOT edit this file manually as any edits will be overwritten by the generation of
+# the file from the modular. If any change should be done, please apply the change to the
+# modular_pp_chart2table.py file directly. One of our CI enforces this.
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# Copyright 2026 The PaddlePaddle Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ...image_processing_backends import TorchvisionBackend
+from ...utils import auto_docstring
+
+
+@auto_docstring
+class PPChart2TableImageProcessor(TorchvisionBackend):
+ resample = 3
+ image_mean = [0.48145466, 0.4578275, 0.40821073]
+ image_std = [0.26862954, 0.26130258, 0.27577711]
+ size = {"height": 1024, "width": 1024}
+ do_resize = True
+ do_rescale = True
+ do_normalize = True
+
+
+__all__ = ["PPChart2TableImageProcessor"]
diff --git a/src/transformers/models/pp_chart2table/modular_pp_chart2table.py b/src/transformers/models/pp_chart2table/modular_pp_chart2table.py
new file mode 100644
index 000000000000..709c465d5738
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/modular_pp_chart2table.py
@@ -0,0 +1,94 @@
+# Copyright 2026 The PaddlePaddle Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from huggingface_hub.dataclasses import strict
+
+from ...feature_extraction_utils import BatchFeature
+from ...image_processing_backends import PilBackend, TorchvisionBackend
+from ...image_utils import ImageInput
+from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
+from ...tokenization_utils_base import PreTokenizedInput, TextInput
+from ...utils import auto_docstring, logging
+from ..got_ocr2.configuration_got_ocr2 import GotOcr2Config
+
+
+logger = logging.get_logger(__name__)
+
+
+@auto_docstring(checkpoint="PaddlePaddle/PP-Chart2Table_safetensors")
+@strict(accept_kwargs=True)
+class PPChart2TableConfig(GotOcr2Config):
+ model_type = "pp_chart2table"
+
+ r"""
+ Example:
+
+ ```python
+ >>> from transformers import GotOcr2ForConditionalGeneration, PPChart2TableConfig
+
+ >>> # Initializing a PPChart2Table style configuration
+ >>> configuration = PPChart2TableConfig()
+
+ >>> # Initializing a model from the PaddlePaddle/PP-Chart2Table_safetensors style configuration
+ >>> model = GotOcr2ForConditionalGeneration(configuration) # underlying architecture is Got Ocr 2
+
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```"""
+
+
+@auto_docstring
+class PPChart2TableImageProcessor(TorchvisionBackend):
+ resample = 3
+ image_mean = [0.48145466, 0.4578275, 0.40821073]
+ image_std = [0.26862954, 0.26130258, 0.27577711]
+ size = {"height": 1024, "width": 1024}
+ do_resize = True
+ do_rescale = True
+ do_normalize = True
+
+
+@auto_docstring
+class PPChart2TableImageProcessorPil(PilBackend):
+ resample = 3
+ image_mean = [0.48145466, 0.4578275, 0.40821073]
+ image_std = [0.26862954, 0.26130258, 0.27577711]
+ size = {"height": 1024, "width": 1024}
+ do_resize = True
+ do_rescale = True
+ do_normalize = True
+
+
+@auto_docstring
+class PPChart2TableProcessor(ProcessorMixin):
+ def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
+ super().__init__(image_processor, tokenizer, chat_template=chat_template)
+
+ def __call__(
+ self,
+ images: ImageInput = None,
+ text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] = None,
+ **kwargs: Unpack[ProcessingKwargs],
+ ) -> BatchFeature:
+ if text is None or images is None:
+ raise ValueError("Both `images` and `text` must be provided")
+ return super().__call__(images=images, text=text, **kwargs)
+
+
+__all__ = [
+ "PPChart2TableConfig",
+ "PPChart2TableImageProcessor",
+ "PPChart2TableImageProcessorPil",
+ "PPChart2TableProcessor",
+]
diff --git a/src/transformers/models/pp_chart2table/processing_pp_chart2table.py b/src/transformers/models/pp_chart2table/processing_pp_chart2table.py
new file mode 100644
index 000000000000..6f5e4554b731
--- /dev/null
+++ b/src/transformers/models/pp_chart2table/processing_pp_chart2table.py
@@ -0,0 +1,45 @@
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# This file was automatically generated from src/transformers/models/pp_chart2table/modular_pp_chart2table.py.
+# Do NOT edit this file manually as any edits will be overwritten by the generation of
+# the file from the modular. If any change should be done, please apply the change to the
+# modular_pp_chart2table.py file directly. One of our CI enforces this.
+# ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ๐จ
+# Copyright 2026 The PaddlePaddle Team and The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from ...feature_extraction_utils import BatchFeature
+from ...image_utils import ImageInput
+from ...processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
+from ...tokenization_utils_base import PreTokenizedInput, TextInput
+from ...utils import auto_docstring
+
+
+@auto_docstring
+class PPChart2TableProcessor(ProcessorMixin):
+ def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
+ super().__init__(image_processor, tokenizer, chat_template=chat_template)
+
+ def __call__(
+ self,
+ images: ImageInput = None,
+ text: TextInput | PreTokenizedInput | list[TextInput] | list[PreTokenizedInput] = None,
+ **kwargs: Unpack[ProcessingKwargs],
+ ) -> BatchFeature:
+ if text is None or images is None:
+ raise ValueError("Both `images` and `text` must be provided")
+ return super().__call__(images=images, text=text, **kwargs)
+
+
+__all__ = ["PPChart2TableProcessor"]
diff --git a/tests/models/pp_chart2table/__init__.py b/tests/models/pp_chart2table/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/models/pp_chart2table/test_image_processing_pp_chart2table.py b/tests/models/pp_chart2table/test_image_processing_pp_chart2table.py
new file mode 100644
index 000000000000..cea024d942e8
--- /dev/null
+++ b/tests/models/pp_chart2table/test_image_processing_pp_chart2table.py
@@ -0,0 +1,94 @@
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+
+from transformers.testing_utils import require_torch, require_vision
+
+from ...test_image_processing_common import ImageProcessingTestMixin, prepare_image_inputs
+
+
+class PPChart2TableImageProcessingTester(unittest.TestCase):
+ def __init__(
+ self,
+ parent,
+ batch_size=7,
+ num_channels=3,
+ image_size=18,
+ min_resolution=30,
+ max_resolution=400,
+ do_resize=True,
+ size=None,
+ do_normalize=True,
+ image_mean=[0.48145466, 0.4578275, 0.40821073],
+ image_std=[0.26862954, 0.26130258, 0.27577711],
+ ):
+ super().__init__()
+ size = size if size is not None else {"height": 1024, "width": 1024}
+ self.parent = parent
+ self.batch_size = batch_size
+ self.num_channels = num_channels
+ self.image_size = image_size
+ self.min_resolution = min_resolution
+ self.max_resolution = max_resolution
+ self.do_resize = do_resize
+ self.size = size
+ self.do_normalize = do_normalize
+ self.image_mean = image_mean
+ self.image_std = image_std
+
+ def prepare_image_processor_dict(self):
+ return {
+ "do_resize": self.do_resize,
+ "size": self.size,
+ "do_normalize": self.do_normalize,
+ "image_mean": self.image_mean,
+ "image_std": self.image_std,
+ }
+
+ def expected_output_image_shape(self, images):
+ return self.num_channels, self.size["height"], self.size["width"]
+
+ def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=False):
+ return prepare_image_inputs(
+ batch_size=self.batch_size,
+ num_channels=self.num_channels,
+ min_resolution=self.min_resolution,
+ max_resolution=self.max_resolution,
+ equal_resolution=equal_resolution,
+ numpify=numpify,
+ torchify=torchify,
+ )
+
+
+@require_torch
+@require_vision
+class PPChart2TableImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
+ def setUp(self):
+ super().setUp()
+ self.image_processor_tester = PPChart2TableImageProcessingTester(self)
+
+ @property
+ def image_processor_dict(self):
+ return self.image_processor_tester.prepare_image_processor_dict()
+
+ def test_image_processor_properties(self):
+ for image_processing_class in self.image_processing_classes.values():
+ image_processor = image_processing_class(**self.image_processor_dict)
+ self.assertTrue(hasattr(image_processor, "do_resize"))
+ self.assertTrue(hasattr(image_processor, "size"))
+ self.assertTrue(hasattr(image_processor, "do_normalize"))
+ self.assertTrue(hasattr(image_processor, "image_mean"))
+ self.assertTrue(hasattr(image_processor, "image_std"))
diff --git a/tests/models/pp_chart2table/test_modeling_pp_chart2table.py b/tests/models/pp_chart2table/test_modeling_pp_chart2table.py
new file mode 100644
index 000000000000..b573723c4d13
--- /dev/null
+++ b/tests/models/pp_chart2table/test_modeling_pp_chart2table.py
@@ -0,0 +1,81 @@
+# Copyright 2026 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Testing suite for the PPChart2Table model."""
+
+import unittest
+
+from transformers import AutoModelForImageTextToText, AutoProcessor
+from transformers.testing_utils import cleanup, require_torch, require_vision, slow, torch_device
+
+
+@slow
+@require_vision
+@require_torch
+class PPChart2TableIntegrationTest(unittest.TestCase):
+ def setUp(self):
+ model_path = "PaddlePaddle/PP-Chart2Table_safetensors"
+ self.model = AutoModelForImageTextToText.from_pretrained(model_path).to(torch_device)
+ self.processor = AutoProcessor.from_pretrained(model_path)
+ self.conversation = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image",
+ "url": "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/chart_parsing_02.png",
+ },
+ ],
+ },
+ ]
+
+ def tearDown(self):
+ cleanup(torch_device, gc_collect=True)
+
+ def test_small_model_integration_test_pp_chart2table(self):
+ inputs = self.processor.apply_chat_template(
+ self.conversation,
+ tokenize=True,
+ add_generation_prompt=True,
+ truncation=True,
+ return_dict=True,
+ return_tensors="pt",
+ ).to(self.model.device)
+
+ generated_ids = self.model.generate(**inputs, do_sample=False, max_new_tokens=32)
+ generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+ decoded_output = self.processor.batch_decode(
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+ )
+
+ expected_output = ["ๅนดไปฝ | ๅๅฎถไบๆ็บงๆ
ๆธธ้ฅญๅบๅนดๅนณๅ่ฅๆถ (็พไธๅ
) | ๅๅฎถไบๆ็บงๆ
ๆธธ้ฅญๅบๅนดๅนณๅๅฉๆถฆ (็พไธๅ
)\n"]
+ self.assertEqual(decoded_output, expected_output)
+
+ def test_small_model_integration_test_pp_chart2table_batched(self):
+ inputs = self.processor.apply_chat_template(
+ [self.conversation, self.conversation],
+ tokenize=True,
+ add_generation_prompt=True,
+ truncation=True,
+ return_dict=True,
+ return_tensors="pt",
+ ).to(self.model.device)
+
+ generated_ids = self.model.generate(**inputs, do_sample=False, max_new_tokens=6)
+ generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+ decoded_output = self.processor.batch_decode(
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+ )
+
+ expected_output = ["ๅนดไปฝ | ๅๅฎถ", "ๅนดไปฝ | ๅๅฎถ"]
+ self.assertEqual(decoded_output, expected_output)
diff --git a/tests/models/pp_chart2table/test_processing_pp_chart2table.py b/tests/models/pp_chart2table/test_processing_pp_chart2table.py
new file mode 100644
index 000000000000..2fec6e4313f1
--- /dev/null
+++ b/tests/models/pp_chart2table/test_processing_pp_chart2table.py
@@ -0,0 +1,84 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from transformers import PPChart2TableProcessor
+from transformers.testing_utils import require_vision
+
+from ...test_processing_common import ProcessorTesterMixin
+
+
+@require_vision
+class PPChart2TableProcessorTest(ProcessorTesterMixin, unittest.TestCase):
+ processor_class = PPChart2TableProcessor
+
+ @classmethod
+ def _setup_tokenizer(cls):
+ tokenizer_class = cls._get_component_class_from_processor("tokenizer")
+ tokenizer = tokenizer_class.from_pretrained("PaddlePaddle/PP-Chart2Table_safetensors")
+ return tokenizer
+
+ def test_ocr_queries(self):
+ processor = self.get_processor()
+ image_input = self.prepare_image_inputs()
+ conversation = [{"role": "user", "content": []}]
+ inputs = processor.apply_chat_template(
+ conversation,
+ tokenize=False,
+ add_generation_prompt=True,
+ )
+ inputs = processor(images=image_input, text=inputs, return_tensors="pt")
+ self.assertEqual(inputs["input_ids"].shape, (1, 287))
+ self.assertEqual(inputs["pixel_values"].shape, (1, 3, 1024, 1024))
+
+ def test_unstructured_kwargs_batched(self):
+ if "image_processor" not in self.processor_class.get_attributes():
+ self.skipTest(f"image_processor attribute not present in {self.processor_class}")
+ processor_components = self.prepare_components()
+ processor_kwargs = self.prepare_processor_dict()
+ processor = self.processor_class(**processor_components, **processor_kwargs)
+ self.skip_processor_without_typed_kwargs(processor)
+
+ input_str = self.prepare_text_inputs(batch_size=2, modalities="image")
+ image_input = self.prepare_image_inputs(batch_size=2)
+ inputs = processor(
+ text=input_str,
+ images=image_input,
+ return_tensors="pt",
+ do_rescale=True,
+ rescale_factor=-1.0,
+ padding="longest",
+ max_length=self.image_unstructured_max_length,
+ )
+
+ self.assertLessEqual(inputs[self.images_input_name][0][0].mean(), 0)
+
+ @unittest.skip(
+ reason="PPChart2Table relies on a heavily predetermined input format; chat template usage is not intended as expected"
+ )
+ def test_apply_chat_template_assistant_mask(self):
+ pass
+
+ @unittest.skip(
+ reason="PPChart2Table relies on a heavily predetermined input format; chat template usage is not intended as expected"
+ )
+ def test_apply_chat_template_image_0(self):
+ pass
+
+ @unittest.skip(
+ reason="PPChart2Table relies on a heavily predetermined input format; chat template usage is not intended as expected"
+ )
+ def test_apply_chat_template_image_1(self):
+ pass
diff --git a/utils/check_config_attributes.py b/utils/check_config_attributes.py
index 01b9a3c7ecb7..b1e968c71f56 100644
--- a/utils/check_config_attributes.py
+++ b/utils/check_config_attributes.py
@@ -138,6 +138,9 @@
"GptOssConfig": True,
"LwDetrConfig": True,
"NemotronHConfig": True,
+ # Internally uses Got Ocr2 so no need to use in the modeling code as we remap in auto instead
+ "PPChart2TableConfig": True,
+ "PPChart2TableVisionConfig": True,
}
# Common and important attributes, even if they do not always appear in the modeling files (can be a regex pattern)